Bug 1237023: Cherry-pick VP9 packetization/jitter-buffer/encoder code from Webrtc.org 48 r=pkerr

--HG--
extra : commitid : 5RePHoOVY7a
This commit is contained in:
Randell Jesup 2016-01-24 23:13:26 -05:00
parent f90e2e16b2
commit 569565ac43
21 changed files with 3374 additions and 163 deletions

View File

@ -15,6 +15,7 @@
#include <string.h> // memcpy
#include <algorithm>
#include <limits>
#include "webrtc/base/constructormagic.h"
#include "webrtc/common_types.h"
@ -31,8 +32,16 @@ struct RTPAudioHeader {
};
const int16_t kNoPictureId = -1;
const int16_t kMaxOneBytePictureId = 0x7F; // 7 bits
const int16_t kMaxTwoBytePictureId = 0x7FFF; // 15 bits
const int16_t kNoTl0PicIdx = -1;
const uint8_t kNoTemporalIdx = 0xFF;
const uint8_t kNoSpatialIdx = 0xFF;
const uint8_t kNoGofIdx = 0xFF;
const uint8_t kNumVp9Buffers = 8;
const size_t kMaxVp9RefPics = 3;
const size_t kMaxVp9FramesInGof = 0xFF; // 8 bits
const size_t kMaxVp9NumberOfSpatialLayers = 8;
const int kNoKeyIdx = -1;
struct RTPVideoHeaderVP8 {
@ -61,37 +70,164 @@ struct RTPVideoHeaderVP8 {
// in a VP8 partition. Otherwise false
};
enum TemporalStructureMode {
kTemporalStructureMode1, // 1 temporal layer structure - i.e., IPPP...
kTemporalStructureMode2, // 2 temporal layers 0-1-0-1...
kTemporalStructureMode3 // 3 temporal layers 0-2-1-2-0-2-1-2...
};
struct GofInfoVP9 {
void SetGofInfoVP9(TemporalStructureMode tm) {
switch (tm) {
case kTemporalStructureMode1:
num_frames_in_gof = 1;
temporal_idx[0] = 0;
temporal_up_switch[0] = false;
num_ref_pics[0] = 1;
pid_diff[0][0] = 1;
break;
case kTemporalStructureMode2:
num_frames_in_gof = 2;
temporal_idx[0] = 0;
temporal_up_switch[0] = false;
num_ref_pics[0] = 1;
pid_diff[0][0] = 2;
temporal_idx[1] = 1;
temporal_up_switch[1] = true;
num_ref_pics[1] = 1;
pid_diff[1][0] = 1;
break;
case kTemporalStructureMode3:
num_frames_in_gof = 4;
temporal_idx[0] = 0;
temporal_up_switch[0] = false;
num_ref_pics[0] = 1;
pid_diff[0][0] = 4;
temporal_idx[1] = 2;
temporal_up_switch[1] = true;
num_ref_pics[1] = 1;
pid_diff[1][0] = 1;
temporal_idx[2] = 1;
temporal_up_switch[2] = true;
num_ref_pics[2] = 1;
pid_diff[2][0] = 2;
temporal_idx[3] = 2;
temporal_up_switch[3] = false;
num_ref_pics[3] = 2;
pid_diff[3][0] = 1;
pid_diff[3][1] = 2;
break;
default:
assert(false);
}
}
void CopyGofInfoVP9(const GofInfoVP9& src) {
num_frames_in_gof = src.num_frames_in_gof;
for (size_t i = 0; i < num_frames_in_gof; ++i) {
temporal_idx[i] = src.temporal_idx[i];
temporal_up_switch[i] = src.temporal_up_switch[i];
num_ref_pics[i] = src.num_ref_pics[i];
for (uint8_t r = 0; r < num_ref_pics[i]; ++r) {
pid_diff[i][r] = src.pid_diff[i][r];
}
}
}
size_t num_frames_in_gof;
uint8_t temporal_idx[kMaxVp9FramesInGof];
bool temporal_up_switch[kMaxVp9FramesInGof];
uint8_t num_ref_pics[kMaxVp9FramesInGof];
uint8_t pid_diff[kMaxVp9FramesInGof][kMaxVp9RefPics];
};
struct RTPVideoHeaderVP9 {
void InitRTPVideoHeaderVP9() {
inter_pic_predicted = false;
flexible_mode = false;
beginning_of_frame = false;
end_of_frame = false;
ss_data_available = false;
picture_id = kNoPictureId;
max_picture_id = kMaxTwoBytePictureId;
tl0_pic_idx = kNoTl0PicIdx;
temporal_idx = kNoTemporalIdx;
spatial_idx = kNoSpatialIdx;
temporal_up_switch = false;
inter_layer_predicted = false;
gof_idx = kNoGofIdx;
num_ref_pics = 0;
num_spatial_layers = 1;
}
bool inter_pic_predicted; // This layer frame is dependent on previously
// coded frame(s).
bool flexible_mode; // This frame is in flexible mode.
bool beginning_of_frame; // True if this packet is the first in a VP9 layer
// frame.
bool end_of_frame; // True if this packet is the last in a VP9 layer frame.
bool ss_data_available; // True if SS data is available in this payload
// descriptor.
int16_t picture_id; // PictureID index, 15 bits;
// kNoPictureId if PictureID does not exist.
int16_t max_picture_id; // Maximum picture ID index; either 0x7F or 0x7FFF;
int16_t tl0_pic_idx; // TL0PIC_IDX, 8 bits;
// kNoTl0PicIdx means no value provided.
uint8_t temporal_idx; // Temporal layer index, or kNoTemporalIdx.
uint8_t spatial_idx; // Spatial layer index, or kNoSpatialIdx.
bool temporal_up_switch; // True if upswitch to higher frame rate is possible
// starting from this frame.
bool inter_layer_predicted; // Frame is dependent on directly lower spatial
// layer frame.
uint8_t gof_idx; // Index to predefined temporal frame info in SS data.
uint8_t num_ref_pics; // Number of reference pictures used by this layer
// frame.
uint8_t pid_diff[kMaxVp9RefPics]; // P_DIFF signaled to derive the PictureID
// of the reference pictures.
int16_t ref_picture_id[kMaxVp9RefPics]; // PictureID of reference pictures.
// SS data.
size_t num_spatial_layers; // Always populated.
bool spatial_layer_resolution_present;
uint16_t width[kMaxVp9NumberOfSpatialLayers];
uint16_t height[kMaxVp9NumberOfSpatialLayers];
GofInfoVP9 gof;
};
#if WEBRTC_48_H264_IMPL
// The packetization types that we support: single, aggregated, and fragmented.
enum H264PacketizationTypes {
kH264SingleNalu, // This packet contains a single NAL unit.
kH264StapA, // This packet contains STAP-A (single time
// aggregation) packets. If this packet has an
// associated NAL unit type, it'll be for the
// first such aggregated packet.
kH264FuA, // This packet contains a FU-A (fragmentation
// unit) packet, meaning it is a part of a frame
// that was too large to fit into a single packet.
};
struct RTPVideoHeaderH264 {
uint8_t nalu_type; // The NAL unit type. If this is a header for a
// fragmented packet, it's the NAL unit type of
// the original data. If this is the header for an
// aggregated packet, it's the NAL unit type of
// the first NAL unit in the packet.
H264PacketizationTypes packetization_type;
};
#else
// Mozilla's OpenH264 implementation
struct RTPVideoHeaderH264 {
bool stap_a;
bool single_nalu;
};
// XXX fix vp9 (bug 1138629)
struct RTPVideoHeaderVP9 {
void InitRTPVideoHeaderVP9() {
nonReference = false;
pictureId = kNoPictureId;
tl0PicIdx = kNoTl0PicIdx;
temporalIdx = kNoTemporalIdx;
layerSync = false;
keyIdx = kNoKeyIdx;
partitionId = 0;
beginningOfPartition = false;
}
bool nonReference; // Frame is discardable.
int16_t pictureId; // Picture ID index, 15 bits;
// kNoPictureId if PictureID does not exist.
int16_t tl0PicIdx; // TL0PIC_IDX, 8 bits;
// kNoTl0PicIdx means no value provided.
uint8_t temporalIdx; // Temporal layer index, or kNoTemporalIdx.
bool layerSync; // This frame is a layer sync frame.
// Disabled if temporalIdx == kNoTemporalIdx.
int keyIdx; // 5 bits; kNoKeyIdx means not used.
int partitionId; // VP9 partition ID
bool beginningOfPartition; // True if this packet is the first
// in a VP9 partition. Otherwise false
};
#endif
union RTPVideoTypeHeader {
RTPVideoHeaderVP8 VP8;
@ -611,6 +747,18 @@ inline AudioFrame& AudioFrame::Append(const AudioFrame& rhs) {
return *this;
}
namespace {
inline int16_t ClampToInt16(int32_t input) {
if (input < -0x00008000) {
return -0x8000;
} else if (input > 0x00007FFF) {
return 0x7FFF;
} else {
return static_cast<int16_t>(input);
}
}
}
inline AudioFrame& AudioFrame::operator+=(const AudioFrame& rhs) {
// Sanity check
assert((num_channels_ > 0) && (num_channels_ < 3));
@ -643,15 +791,9 @@ inline AudioFrame& AudioFrame::operator+=(const AudioFrame& rhs) {
} else {
// IMPROVEMENT this can be done very fast in assembly
for (int i = 0; i < samples_per_channel_ * num_channels_; i++) {
int32_t wrapGuard =
int32_t wrap_guard =
static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]);
if (wrapGuard < -32768) {
data_[i] = -32768;
} else if (wrapGuard > 32767) {
data_[i] = 32767;
} else {
data_[i] = (int16_t)wrapGuard;
}
data_[i] = ClampToInt16(wrap_guard);
}
}
energy_ = 0xffffffff;
@ -674,15 +816,9 @@ inline AudioFrame& AudioFrame::operator-=(const AudioFrame& rhs) {
speech_type_ = kUndefined;
for (int i = 0; i < samples_per_channel_ * num_channels_; i++) {
int32_t wrapGuard =
int32_t wrap_guard =
static_cast<int32_t>(data_[i]) - static_cast<int32_t>(rhs.data_[i]);
if (wrapGuard < -32768) {
data_[i] = -32768;
} else if (wrapGuard > 32767) {
data_[i] = 32767;
} else {
data_[i] = (int16_t)wrapGuard;
}
data_[i] = ClampToInt16(wrap_guard);
}
energy_ = 0xffffffff;
return *this;
@ -690,11 +826,24 @@ inline AudioFrame& AudioFrame::operator-=(const AudioFrame& rhs) {
inline bool IsNewerSequenceNumber(uint16_t sequence_number,
uint16_t prev_sequence_number) {
// Distinguish between elements that are exactly 0x8000 apart.
// If s1>s2 and |s1-s2| = 0x8000: IsNewer(s1,s2)=true, IsNewer(s2,s1)=false
// rather than having IsNewer(s1,s2) = IsNewer(s2,s1) = false.
if (static_cast<uint16_t>(sequence_number - prev_sequence_number) == 0x8000) {
return sequence_number > prev_sequence_number;
}
return sequence_number != prev_sequence_number &&
static_cast<uint16_t>(sequence_number - prev_sequence_number) < 0x8000;
}
inline bool IsNewerTimestamp(uint32_t timestamp, uint32_t prev_timestamp) {
// Distinguish between elements that are exactly 0x80000000 apart.
// If t1>t2 and |t1-t2| = 0x80000000: IsNewer(t1,t2)=true,
// IsNewer(t2,t1)=false
// rather than having IsNewer(t1,t2) = IsNewer(t2,t1) = false.
if (static_cast<uint32_t>(timestamp - prev_timestamp) == 0x80000000) {
return timestamp > prev_timestamp;
}
return timestamp != prev_timestamp &&
static_cast<uint32_t>(timestamp - prev_timestamp) < 0x80000000;
}
@ -715,6 +864,46 @@ inline uint32_t LatestTimestamp(uint32_t timestamp1, uint32_t timestamp2) {
return IsNewerTimestamp(timestamp1, timestamp2) ? timestamp1 : timestamp2;
}
// Utility class to unwrap a sequence number to a larger type, for easier
// handling large ranges. Note that sequence numbers will never be unwrapped
// to a negative value.
class SequenceNumberUnwrapper {
public:
SequenceNumberUnwrapper() : last_seq_(-1) {}
// Get the unwrapped sequence, but don't update the internal state.
int64_t UnwrapWithoutUpdate(uint16_t sequence_number) {
if (last_seq_ == -1)
return sequence_number;
uint16_t cropped_last = static_cast<uint16_t>(last_seq_);
int64_t delta = sequence_number - cropped_last;
if (IsNewerSequenceNumber(sequence_number, cropped_last)) {
if (delta < 0)
delta += (1 << 16); // Wrap forwards.
} else if (delta > 0 && (last_seq_ + delta - (1 << 16)) >= 0) {
// If sequence_number is older but delta is positive, this is a backwards
// wrap-around. However, don't wrap backwards past 0 (unwrapped).
delta -= (1 << 16);
}
return last_seq_ + delta;
}
// Only update the internal state to the specified last (unwrapped) sequence.
void UpdateLast(int64_t last_sequence) { last_seq_ = last_sequence; }
// Unwrap the sequence number and update the internal state.
int64_t Unwrap(uint16_t sequence_number) {
int64_t unwrapped = UnwrapWithoutUpdate(sequence_number);
UpdateLast(unwrapped);
return unwrapped;
}
private:
int64_t last_seq_;
};
} // namespace webrtc
#endif // MODULE_COMMON_TYPES_H

View File

@ -13,6 +13,7 @@
#include "webrtc/modules/rtp_rtcp/source/rtp_format_h264.h"
#include "webrtc/modules/rtp_rtcp/source/rtp_format_video_generic.h"
#include "webrtc/modules/rtp_rtcp/source/rtp_format_vp8.h"
#include "webrtc/modules/rtp_rtcp/source/rtp_format_vp9.h"
namespace webrtc {
RtpPacketizer* RtpPacketizer::Create(RtpVideoCodecTypes type,
@ -26,6 +27,8 @@ RtpPacketizer* RtpPacketizer::Create(RtpVideoCodecTypes type,
assert(rtp_type_header != NULL);
return new RtpPacketizerVp8(rtp_type_header->VP8, max_payload_len);
case kRtpVideoVp9:
assert(rtp_type_header != NULL);
return new RtpPacketizerVp9(rtp_type_header->VP9, max_payload_len);
case kRtpVideoGeneric:
return new RtpPacketizerGeneric(frame_type, max_payload_len);
case kRtpVideoNone:
@ -40,7 +43,8 @@ RtpDepacketizer* RtpDepacketizer::Create(RtpVideoCodecTypes type) {
return new RtpDepacketizerH264();
case kRtpVideoVp8:
return new RtpDepacketizerVp8();
case kRtpVideoVp9: // XXX fix vp9 packetization (bug 1138629)
case kRtpVideoVp9:
return new RtpDepacketizerVp9();
case kRtpVideoGeneric:
return new RtpDepacketizerGeneric();
case kRtpVideoNone:

View File

@ -0,0 +1,743 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/rtp_rtcp/source/rtp_format_vp9.h"
#include <assert.h>
#include <string.h>
#include <cmath>
#include "webrtc/base/bitbuffer.h"
#include "webrtc/base/checks.h"
#include "webrtc/base/logging.h"
#define RETURN_FALSE_ON_ERROR(x) \
if (!(x)) { \
return false; \
}
namespace webrtc {
namespace {
// Length of VP9 payload descriptors' fixed part.
const size_t kFixedPayloadDescriptorBytes = 1;
// Packet fragmentation mode. If true, packets are split into (almost) equal
// sizes. Otherwise, as many bytes as possible are fit into one packet.
const bool kBalancedMode = true;
const uint32_t kReservedBitValue0 = 0;
uint8_t TemporalIdxField(const RTPVideoHeaderVP9& hdr, uint8_t def) {
return (hdr.temporal_idx == kNoTemporalIdx) ? def : hdr.temporal_idx;
}
uint8_t SpatialIdxField(const RTPVideoHeaderVP9& hdr, uint8_t def) {
return (hdr.spatial_idx == kNoSpatialIdx) ? def : hdr.spatial_idx;
}
int16_t Tl0PicIdxField(const RTPVideoHeaderVP9& hdr, uint8_t def) {
return (hdr.tl0_pic_idx == kNoTl0PicIdx) ? def : hdr.tl0_pic_idx;
}
// Picture ID:
//
// +-+-+-+-+-+-+-+-+
// I: |M| PICTURE ID | M:0 => picture id is 7 bits.
// +-+-+-+-+-+-+-+-+ M:1 => picture id is 15 bits.
// M: | EXTENDED PID |
// +-+-+-+-+-+-+-+-+
//
size_t PictureIdLength(const RTPVideoHeaderVP9& hdr) {
if (hdr.picture_id == kNoPictureId)
return 0;
return (hdr.max_picture_id == kMaxOneBytePictureId) ? 1 : 2;
}
bool PictureIdPresent(const RTPVideoHeaderVP9& hdr) {
return PictureIdLength(hdr) > 0;
}
// Layer indices:
//
// Flexible mode (F=1): Non-flexible mode (F=0):
//
// +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+
// L: | T |U| S |D| | T |U| S |D|
// +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+
// | TL0PICIDX |
// +-+-+-+-+-+-+-+-+
//
size_t LayerInfoLength(const RTPVideoHeaderVP9& hdr) {
if (hdr.temporal_idx == kNoTemporalIdx &&
hdr.spatial_idx == kNoSpatialIdx) {
return 0;
}
return hdr.flexible_mode ? 1 : 2;
}
bool LayerInfoPresent(const RTPVideoHeaderVP9& hdr) {
return LayerInfoLength(hdr) > 0;
}
// Reference indices:
//
// +-+-+-+-+-+-+-+-+ P=1,F=1: At least one reference index
// P,F: | P_DIFF |N| up to 3 times has to be specified.
// +-+-+-+-+-+-+-+-+ N=1: An additional P_DIFF follows
// current P_DIFF.
//
size_t RefIndicesLength(const RTPVideoHeaderVP9& hdr) {
if (!hdr.inter_pic_predicted || !hdr.flexible_mode)
return 0;
RTC_DCHECK_GT(hdr.num_ref_pics, 0U);
RTC_DCHECK_LE(hdr.num_ref_pics, kMaxVp9RefPics);
return hdr.num_ref_pics;
}
// Scalability structure (SS).
//
// +-+-+-+-+-+-+-+-+
// V: | N_S |Y|G|-|-|-|
// +-+-+-+-+-+-+-+-+ -|
// Y: | WIDTH | (OPTIONAL) .
// + + .
// | | (OPTIONAL) .
// +-+-+-+-+-+-+-+-+ . N_S + 1 times
// | HEIGHT | (OPTIONAL) .
// + + .
// | | (OPTIONAL) .
// +-+-+-+-+-+-+-+-+ -|
// G: | N_G | (OPTIONAL)
// +-+-+-+-+-+-+-+-+ -|
// N_G: | T |U| R |-|-| (OPTIONAL) .
// +-+-+-+-+-+-+-+-+ -| . N_G times
// | P_DIFF | (OPTIONAL) . R times .
// +-+-+-+-+-+-+-+-+ -| -|
//
size_t SsDataLength(const RTPVideoHeaderVP9& hdr) {
if (!hdr.ss_data_available)
return 0;
RTC_DCHECK_GT(hdr.num_spatial_layers, 0U);
RTC_DCHECK_LE(hdr.num_spatial_layers, kMaxVp9NumberOfSpatialLayers);
RTC_DCHECK_LE(hdr.gof.num_frames_in_gof, kMaxVp9FramesInGof);
size_t length = 1; // V
if (hdr.spatial_layer_resolution_present) {
length += 4 * hdr.num_spatial_layers; // Y
}
if (hdr.gof.num_frames_in_gof > 0) {
++length; // G
}
// N_G
length += hdr.gof.num_frames_in_gof; // T, U, R
for (size_t i = 0; i < hdr.gof.num_frames_in_gof; ++i) {
RTC_DCHECK_LE(hdr.gof.num_ref_pics[i], kMaxVp9RefPics);
length += hdr.gof.num_ref_pics[i]; // R times
}
return length;
}
size_t PayloadDescriptorLengthMinusSsData(const RTPVideoHeaderVP9& hdr) {
return kFixedPayloadDescriptorBytes + PictureIdLength(hdr) +
LayerInfoLength(hdr) + RefIndicesLength(hdr);
}
size_t PayloadDescriptorLength(const RTPVideoHeaderVP9& hdr) {
return PayloadDescriptorLengthMinusSsData(hdr) + SsDataLength(hdr);
}
void QueuePacket(size_t start_pos,
size_t size,
bool layer_begin,
bool layer_end,
RtpPacketizerVp9::PacketInfoQueue* packets) {
RtpPacketizerVp9::PacketInfo packet_info;
packet_info.payload_start_pos = start_pos;
packet_info.size = size;
packet_info.layer_begin = layer_begin;
packet_info.layer_end = layer_end;
packets->push(packet_info);
}
// Picture ID:
//
// +-+-+-+-+-+-+-+-+
// I: |M| PICTURE ID | M:0 => picture id is 7 bits.
// +-+-+-+-+-+-+-+-+ M:1 => picture id is 15 bits.
// M: | EXTENDED PID |
// +-+-+-+-+-+-+-+-+
//
bool WritePictureId(const RTPVideoHeaderVP9& vp9,
rtc::BitBufferWriter* writer) {
bool m_bit = (PictureIdLength(vp9) == 2);
RETURN_FALSE_ON_ERROR(writer->WriteBits(m_bit ? 1 : 0, 1));
RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.picture_id, m_bit ? 15 : 7));
return true;
}
// Layer indices:
//
// Flexible mode (F=1):
//
// +-+-+-+-+-+-+-+-+
// L: | T |U| S |D|
// +-+-+-+-+-+-+-+-+
//
bool WriteLayerInfoCommon(const RTPVideoHeaderVP9& vp9,
rtc::BitBufferWriter* writer) {
RETURN_FALSE_ON_ERROR(writer->WriteBits(TemporalIdxField(vp9, 0), 3));
RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.temporal_up_switch ? 1 : 0, 1));
RETURN_FALSE_ON_ERROR(writer->WriteBits(SpatialIdxField(vp9, 0), 3));
RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.inter_layer_predicted ? 1: 0, 1));
return true;
}
// Non-flexible mode (F=0):
//
// +-+-+-+-+-+-+-+-+
// L: | T |U| S |D|
// +-+-+-+-+-+-+-+-+
// | TL0PICIDX |
// +-+-+-+-+-+-+-+-+
//
bool WriteLayerInfoNonFlexibleMode(const RTPVideoHeaderVP9& vp9,
rtc::BitBufferWriter* writer) {
RETURN_FALSE_ON_ERROR(writer->WriteUInt8(Tl0PicIdxField(vp9, 0)));
return true;
}
bool WriteLayerInfo(const RTPVideoHeaderVP9& vp9,
rtc::BitBufferWriter* writer) {
if (!WriteLayerInfoCommon(vp9, writer))
return false;
if (vp9.flexible_mode)
return true;
return WriteLayerInfoNonFlexibleMode(vp9, writer);
}
// Reference indices:
//
// +-+-+-+-+-+-+-+-+ P=1,F=1: At least one reference index
// P,F: | P_DIFF |N| up to 3 times has to be specified.
// +-+-+-+-+-+-+-+-+ N=1: An additional P_DIFF follows
// current P_DIFF.
//
bool WriteRefIndices(const RTPVideoHeaderVP9& vp9,
rtc::BitBufferWriter* writer) {
if (!PictureIdPresent(vp9) ||
vp9.num_ref_pics == 0 || vp9.num_ref_pics > kMaxVp9RefPics) {
return false;
}
for (uint8_t i = 0; i < vp9.num_ref_pics; ++i) {
bool n_bit = !(i == vp9.num_ref_pics - 1);
RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.pid_diff[i], 7));
RETURN_FALSE_ON_ERROR(writer->WriteBits(n_bit ? 1 : 0, 1));
}
return true;
}
// Scalability structure (SS).
//
// +-+-+-+-+-+-+-+-+
// V: | N_S |Y|G|-|-|-|
// +-+-+-+-+-+-+-+-+ -|
// Y: | WIDTH | (OPTIONAL) .
// + + .
// | | (OPTIONAL) .
// +-+-+-+-+-+-+-+-+ . N_S + 1 times
// | HEIGHT | (OPTIONAL) .
// + + .
// | | (OPTIONAL) .
// +-+-+-+-+-+-+-+-+ -|
// G: | N_G | (OPTIONAL)
// +-+-+-+-+-+-+-+-+ -|
// N_G: | T |U| R |-|-| (OPTIONAL) .
// +-+-+-+-+-+-+-+-+ -| . N_G times
// | P_DIFF | (OPTIONAL) . R times .
// +-+-+-+-+-+-+-+-+ -| -|
//
bool WriteSsData(const RTPVideoHeaderVP9& vp9, rtc::BitBufferWriter* writer) {
RTC_DCHECK_GT(vp9.num_spatial_layers, 0U);
RTC_DCHECK_LE(vp9.num_spatial_layers, kMaxVp9NumberOfSpatialLayers);
RTC_DCHECK_LE(vp9.gof.num_frames_in_gof, kMaxVp9FramesInGof);
bool g_bit = vp9.gof.num_frames_in_gof > 0;
RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.num_spatial_layers - 1, 3));
RETURN_FALSE_ON_ERROR(
writer->WriteBits(vp9.spatial_layer_resolution_present ? 1 : 0, 1));
RETURN_FALSE_ON_ERROR(writer->WriteBits(g_bit ? 1 : 0, 1)); // G
RETURN_FALSE_ON_ERROR(writer->WriteBits(kReservedBitValue0, 3));
if (vp9.spatial_layer_resolution_present) {
for (size_t i = 0; i < vp9.num_spatial_layers; ++i) {
RETURN_FALSE_ON_ERROR(writer->WriteUInt16(vp9.width[i]));
RETURN_FALSE_ON_ERROR(writer->WriteUInt16(vp9.height[i]));
}
}
if (g_bit) {
RETURN_FALSE_ON_ERROR(writer->WriteUInt8(vp9.gof.num_frames_in_gof));
}
for (size_t i = 0; i < vp9.gof.num_frames_in_gof; ++i) {
RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.gof.temporal_idx[i], 3));
RETURN_FALSE_ON_ERROR(
writer->WriteBits(vp9.gof.temporal_up_switch[i] ? 1 : 0, 1));
RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.gof.num_ref_pics[i], 2));
RETURN_FALSE_ON_ERROR(writer->WriteBits(kReservedBitValue0, 2));
for (uint8_t r = 0; r < vp9.gof.num_ref_pics[i]; ++r) {
RETURN_FALSE_ON_ERROR(writer->WriteUInt8(vp9.gof.pid_diff[i][r]));
}
}
return true;
}
// Picture ID:
//
// +-+-+-+-+-+-+-+-+
// I: |M| PICTURE ID | M:0 => picture id is 7 bits.
// +-+-+-+-+-+-+-+-+ M:1 => picture id is 15 bits.
// M: | EXTENDED PID |
// +-+-+-+-+-+-+-+-+
//
bool ParsePictureId(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) {
uint32_t picture_id;
uint32_t m_bit;
RETURN_FALSE_ON_ERROR(parser->ReadBits(&m_bit, 1));
if (m_bit) {
RETURN_FALSE_ON_ERROR(parser->ReadBits(&picture_id, 15));
vp9->max_picture_id = kMaxTwoBytePictureId;
} else {
RETURN_FALSE_ON_ERROR(parser->ReadBits(&picture_id, 7));
vp9->max_picture_id = kMaxOneBytePictureId;
}
vp9->picture_id = picture_id;
return true;
}
// Layer indices (flexible mode):
//
// +-+-+-+-+-+-+-+-+
// L: | T |U| S |D|
// +-+-+-+-+-+-+-+-+
//
bool ParseLayerInfoCommon(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) {
uint32_t t, u_bit, s, d_bit;
RETURN_FALSE_ON_ERROR(parser->ReadBits(&t, 3));
RETURN_FALSE_ON_ERROR(parser->ReadBits(&u_bit, 1));
RETURN_FALSE_ON_ERROR(parser->ReadBits(&s, 3));
RETURN_FALSE_ON_ERROR(parser->ReadBits(&d_bit, 1));
vp9->temporal_idx = t;
vp9->temporal_up_switch = u_bit ? true : false;
vp9->spatial_idx = s;
vp9->inter_layer_predicted = d_bit ? true : false;
return true;
}
// Layer indices (non-flexible mode):
//
// +-+-+-+-+-+-+-+-+
// L: | T |U| S |D|
// +-+-+-+-+-+-+-+-+
// | TL0PICIDX |
// +-+-+-+-+-+-+-+-+
//
bool ParseLayerInfoNonFlexibleMode(rtc::BitBuffer* parser,
RTPVideoHeaderVP9* vp9) {
uint8_t tl0picidx;
RETURN_FALSE_ON_ERROR(parser->ReadUInt8(&tl0picidx));
vp9->tl0_pic_idx = tl0picidx;
return true;
}
bool ParseLayerInfo(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) {
if (!ParseLayerInfoCommon(parser, vp9))
return false;
if (vp9->flexible_mode)
return true;
return ParseLayerInfoNonFlexibleMode(parser, vp9);
}
// Reference indices:
//
// +-+-+-+-+-+-+-+-+ P=1,F=1: At least one reference index
// P,F: | P_DIFF |N| up to 3 times has to be specified.
// +-+-+-+-+-+-+-+-+ N=1: An additional P_DIFF follows
// current P_DIFF.
//
bool ParseRefIndices(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) {
if (vp9->picture_id == kNoPictureId)
return false;
vp9->num_ref_pics = 0;
uint32_t n_bit;
do {
if (vp9->num_ref_pics == kMaxVp9RefPics)
return false;
uint32_t p_diff;
RETURN_FALSE_ON_ERROR(parser->ReadBits(&p_diff, 7));
RETURN_FALSE_ON_ERROR(parser->ReadBits(&n_bit, 1));
vp9->pid_diff[vp9->num_ref_pics] = p_diff;
uint32_t scaled_pid = vp9->picture_id;
if (p_diff > scaled_pid) {
// TODO(asapersson): Max should correspond to the picture id of last wrap.
scaled_pid += vp9->max_picture_id + 1;
}
vp9->ref_picture_id[vp9->num_ref_pics++] = scaled_pid - p_diff;
} while (n_bit);
return true;
}
// Scalability structure (SS).
//
// +-+-+-+-+-+-+-+-+
// V: | N_S |Y|G|-|-|-|
// +-+-+-+-+-+-+-+-+ -|
// Y: | WIDTH | (OPTIONAL) .
// + + .
// | | (OPTIONAL) .
// +-+-+-+-+-+-+-+-+ . N_S + 1 times
// | HEIGHT | (OPTIONAL) .
// + + .
// | | (OPTIONAL) .
// +-+-+-+-+-+-+-+-+ -|
// G: | N_G | (OPTIONAL)
// +-+-+-+-+-+-+-+-+ -|
// N_G: | T |U| R |-|-| (OPTIONAL) .
// +-+-+-+-+-+-+-+-+ -| . N_G times
// | P_DIFF | (OPTIONAL) . R times .
// +-+-+-+-+-+-+-+-+ -| -|
//
bool ParseSsData(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) {
uint32_t n_s, y_bit, g_bit;
RETURN_FALSE_ON_ERROR(parser->ReadBits(&n_s, 3));
RETURN_FALSE_ON_ERROR(parser->ReadBits(&y_bit, 1));
RETURN_FALSE_ON_ERROR(parser->ReadBits(&g_bit, 1));
RETURN_FALSE_ON_ERROR(parser->ConsumeBits(3));
vp9->num_spatial_layers = n_s + 1;
vp9->spatial_layer_resolution_present = y_bit ? true : false;
vp9->gof.num_frames_in_gof = 0;
if (y_bit) {
for (size_t i = 0; i < vp9->num_spatial_layers; ++i) {
RETURN_FALSE_ON_ERROR(parser->ReadUInt16(&vp9->width[i]));
RETURN_FALSE_ON_ERROR(parser->ReadUInt16(&vp9->height[i]));
}
}
if (g_bit) {
uint8_t n_g;
RETURN_FALSE_ON_ERROR(parser->ReadUInt8(&n_g));
vp9->gof.num_frames_in_gof = n_g;
}
for (size_t i = 0; i < vp9->gof.num_frames_in_gof; ++i) {
uint32_t t, u_bit, r;
RETURN_FALSE_ON_ERROR(parser->ReadBits(&t, 3));
RETURN_FALSE_ON_ERROR(parser->ReadBits(&u_bit, 1));
RETURN_FALSE_ON_ERROR(parser->ReadBits(&r, 2));
RETURN_FALSE_ON_ERROR(parser->ConsumeBits(2));
vp9->gof.temporal_idx[i] = t;
vp9->gof.temporal_up_switch[i] = u_bit ? true : false;
vp9->gof.num_ref_pics[i] = r;
for (uint8_t p = 0; p < vp9->gof.num_ref_pics[i]; ++p) {
uint8_t p_diff;
RETURN_FALSE_ON_ERROR(parser->ReadUInt8(&p_diff));
vp9->gof.pid_diff[i][p] = p_diff;
}
}
return true;
}
// Gets the size of next payload chunk to send. Returns 0 on error.
size_t CalcNextSize(size_t max_length, size_t rem_bytes) {
if (max_length == 0 || rem_bytes == 0) {
return 0;
}
if (kBalancedMode) {
size_t num_frags = std::ceil(static_cast<double>(rem_bytes) / max_length);
return static_cast<size_t>(
static_cast<double>(rem_bytes) / num_frags + 0.5);
}
return max_length >= rem_bytes ? rem_bytes : max_length;
}
} // namespace
RtpPacketizerVp9::RtpPacketizerVp9(const RTPVideoHeaderVP9& hdr,
size_t max_payload_length)
: hdr_(hdr),
max_payload_length_(max_payload_length),
payload_(nullptr),
payload_size_(0) {
}
RtpPacketizerVp9::~RtpPacketizerVp9() {
}
ProtectionType RtpPacketizerVp9::GetProtectionType() {
bool protect =
hdr_.temporal_idx == 0 || hdr_.temporal_idx == kNoTemporalIdx;
return protect ? kProtectedPacket : kUnprotectedPacket;
}
StorageType RtpPacketizerVp9::GetStorageType(uint32_t retransmission_settings) {
StorageType storage = kAllowRetransmission;
if (hdr_.temporal_idx == 0 &&
!(retransmission_settings & kRetransmitBaseLayer)) {
storage = kDontRetransmit;
} else if (hdr_.temporal_idx != kNoTemporalIdx && hdr_.temporal_idx > 0 &&
!(retransmission_settings & kRetransmitHigherLayers)) {
storage = kDontRetransmit;
}
return storage;
}
std::string RtpPacketizerVp9::ToString() {
return "RtpPacketizerVp9";
}
void RtpPacketizerVp9::SetPayloadData(
const uint8_t* payload,
size_t payload_size,
const RTPFragmentationHeader* fragmentation) {
payload_ = payload;
payload_size_ = payload_size;
GeneratePackets();
}
void RtpPacketizerVp9::GeneratePackets() {
if (max_payload_length_ < PayloadDescriptorLength(hdr_) + 1) {
LOG(LS_ERROR) << "Payload header and one payload byte won't fit.";
return;
}
size_t bytes_processed = 0;
while (bytes_processed < payload_size_) {
size_t rem_bytes = payload_size_ - bytes_processed;
size_t rem_payload_len = max_payload_length_ -
(bytes_processed ? PayloadDescriptorLengthMinusSsData(hdr_)
: PayloadDescriptorLength(hdr_));
size_t packet_bytes = CalcNextSize(rem_payload_len, rem_bytes);
if (packet_bytes == 0) {
LOG(LS_ERROR) << "Failed to generate VP9 packets.";
while (!packets_.empty())
packets_.pop();
return;
}
QueuePacket(bytes_processed, packet_bytes, bytes_processed == 0,
rem_bytes == packet_bytes, &packets_);
bytes_processed += packet_bytes;
}
assert(bytes_processed == payload_size_);
}
bool RtpPacketizerVp9::NextPacket(uint8_t* buffer,
size_t* bytes_to_send,
bool* last_packet) {
if (packets_.empty()) {
return false;
}
PacketInfo packet_info = packets_.front();
packets_.pop();
if (!WriteHeaderAndPayload(packet_info, buffer, bytes_to_send)) {
return false;
}
*last_packet =
packets_.empty() && (hdr_.spatial_idx == kNoSpatialIdx ||
hdr_.spatial_idx == hdr_.num_spatial_layers - 1);
return true;
}
// VP9 format:
//
// Payload descriptor for F = 1 (flexible mode)
// 0 1 2 3 4 5 6 7
// +-+-+-+-+-+-+-+-+
// |I|P|L|F|B|E|V|-| (REQUIRED)
// +-+-+-+-+-+-+-+-+
// I: |M| PICTURE ID | (RECOMMENDED)
// +-+-+-+-+-+-+-+-+
// M: | EXTENDED PID | (RECOMMENDED)
// +-+-+-+-+-+-+-+-+
// L: | T |U| S |D| (CONDITIONALLY RECOMMENDED)
// +-+-+-+-+-+-+-+-+ -|
// P,F: | P_DIFF |N| (CONDITIONALLY RECOMMENDED) . up to 3 times
// +-+-+-+-+-+-+-+-+ -|
// V: | SS |
// | .. |
// +-+-+-+-+-+-+-+-+
//
// Payload descriptor for F = 0 (non-flexible mode)
// 0 1 2 3 4 5 6 7
// +-+-+-+-+-+-+-+-+
// |I|P|L|F|B|E|V|-| (REQUIRED)
// +-+-+-+-+-+-+-+-+
// I: |M| PICTURE ID | (RECOMMENDED)
// +-+-+-+-+-+-+-+-+
// M: | EXTENDED PID | (RECOMMENDED)
// +-+-+-+-+-+-+-+-+
// L: | T |U| S |D| (CONDITIONALLY RECOMMENDED)
// +-+-+-+-+-+-+-+-+
// | TL0PICIDX | (CONDITIONALLY REQUIRED)
// +-+-+-+-+-+-+-+-+
// V: | SS |
// | .. |
// +-+-+-+-+-+-+-+-+
bool RtpPacketizerVp9::WriteHeaderAndPayload(const PacketInfo& packet_info,
uint8_t* buffer,
size_t* bytes_to_send) const {
size_t header_length;
if (!WriteHeader(packet_info, buffer, &header_length))
return false;
// Copy payload data.
memcpy(&buffer[header_length],
&payload_[packet_info.payload_start_pos], packet_info.size);
*bytes_to_send = header_length + packet_info.size;
return true;
}
bool RtpPacketizerVp9::WriteHeader(const PacketInfo& packet_info,
uint8_t* buffer,
size_t* header_length) const {
// Required payload descriptor byte.
bool i_bit = PictureIdPresent(hdr_);
bool p_bit = hdr_.inter_pic_predicted;
bool l_bit = LayerInfoPresent(hdr_);
bool f_bit = hdr_.flexible_mode;
bool b_bit = packet_info.layer_begin;
bool e_bit = packet_info.layer_end;
bool v_bit = hdr_.ss_data_available && b_bit;
rtc::BitBufferWriter writer(buffer, max_payload_length_);
RETURN_FALSE_ON_ERROR(writer.WriteBits(i_bit ? 1 : 0, 1));
RETURN_FALSE_ON_ERROR(writer.WriteBits(p_bit ? 1 : 0, 1));
RETURN_FALSE_ON_ERROR(writer.WriteBits(l_bit ? 1 : 0, 1));
RETURN_FALSE_ON_ERROR(writer.WriteBits(f_bit ? 1 : 0, 1));
RETURN_FALSE_ON_ERROR(writer.WriteBits(b_bit ? 1 : 0, 1));
RETURN_FALSE_ON_ERROR(writer.WriteBits(e_bit ? 1 : 0, 1));
RETURN_FALSE_ON_ERROR(writer.WriteBits(v_bit ? 1 : 0, 1));
RETURN_FALSE_ON_ERROR(writer.WriteBits(kReservedBitValue0, 1));
// Add fields that are present.
if (i_bit && !WritePictureId(hdr_, &writer)) {
LOG(LS_ERROR) << "Failed writing VP9 picture id.";
return false;
}
if (l_bit && !WriteLayerInfo(hdr_, &writer)) {
LOG(LS_ERROR) << "Failed writing VP9 layer info.";
return false;
}
if (p_bit && f_bit && !WriteRefIndices(hdr_, &writer)) {
LOG(LS_ERROR) << "Failed writing VP9 ref indices.";
return false;
}
if (v_bit && !WriteSsData(hdr_, &writer)) {
LOG(LS_ERROR) << "Failed writing VP9 SS data.";
return false;
}
size_t offset_bytes = 0;
size_t offset_bits = 0;
writer.GetCurrentOffset(&offset_bytes, &offset_bits);
assert(offset_bits == 0);
*header_length = offset_bytes;
return true;
}
bool RtpDepacketizerVp9::Parse(ParsedPayload* parsed_payload,
const uint8_t* payload,
size_t payload_length) {
assert(parsed_payload != nullptr);
if (payload_length == 0) {
LOG(LS_ERROR) << "Payload length is zero.";
return false;
}
// Parse mandatory first byte of payload descriptor.
rtc::BitBuffer parser(payload, payload_length);
uint32_t i_bit, p_bit, l_bit, f_bit, b_bit, e_bit, v_bit;
RETURN_FALSE_ON_ERROR(parser.ReadBits(&i_bit, 1));
RETURN_FALSE_ON_ERROR(parser.ReadBits(&p_bit, 1));
RETURN_FALSE_ON_ERROR(parser.ReadBits(&l_bit, 1));
RETURN_FALSE_ON_ERROR(parser.ReadBits(&f_bit, 1));
RETURN_FALSE_ON_ERROR(parser.ReadBits(&b_bit, 1));
RETURN_FALSE_ON_ERROR(parser.ReadBits(&e_bit, 1));
RETURN_FALSE_ON_ERROR(parser.ReadBits(&v_bit, 1));
RETURN_FALSE_ON_ERROR(parser.ConsumeBits(1));
// Parsed payload.
parsed_payload->type.Video.width = 0;
parsed_payload->type.Video.height = 0;
parsed_payload->type.Video.simulcastIdx = 0;
parsed_payload->type.Video.codec = kRtpVideoVp9;
parsed_payload->frame_type = p_bit ? kVideoFrameDelta : kVideoFrameKey;
RTPVideoHeaderVP9* vp9 = &parsed_payload->type.Video.codecHeader.VP9;
vp9->InitRTPVideoHeaderVP9();
vp9->inter_pic_predicted = p_bit ? true : false;
vp9->flexible_mode = f_bit ? true : false;
vp9->beginning_of_frame = b_bit ? true : false;
vp9->end_of_frame = e_bit ? true : false;
vp9->ss_data_available = v_bit ? true : false;
vp9->spatial_idx = 0;
// Parse fields that are present.
if (i_bit && !ParsePictureId(&parser, vp9)) {
LOG(LS_ERROR) << "Failed parsing VP9 picture id.";
return false;
}
if (l_bit && !ParseLayerInfo(&parser, vp9)) {
LOG(LS_ERROR) << "Failed parsing VP9 layer info.";
return false;
}
if (p_bit && f_bit && !ParseRefIndices(&parser, vp9)) {
LOG(LS_ERROR) << "Failed parsing VP9 ref indices.";
return false;
}
if (v_bit) {
if (!ParseSsData(&parser, vp9)) {
LOG(LS_ERROR) << "Failed parsing VP9 SS data.";
return false;
}
if (vp9->spatial_layer_resolution_present) {
// TODO(asapersson): Add support for spatial layers.
parsed_payload->type.Video.width = vp9->width[0];
parsed_payload->type.Video.height = vp9->height[0];
}
}
parsed_payload->type.Video.isFirstPacket =
b_bit && (!l_bit || !vp9->inter_layer_predicted);
uint64_t rem_bits = parser.RemainingBitCount();
assert(rem_bits % 8 == 0);
parsed_payload->payload_length = rem_bits / 8;
if (parsed_payload->payload_length == 0) {
LOG(LS_ERROR) << "Failed parsing VP9 payload data.";
return false;
}
parsed_payload->payload =
payload + payload_length - parsed_payload->payload_length;
return true;
}
} // namespace webrtc

View File

@ -0,0 +1,108 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
//
// This file contains the declaration of the VP9 packetizer class.
// A packetizer object is created for each encoded video frame. The
// constructor is called with the payload data and size.
//
// After creating the packetizer, the method NextPacket is called
// repeatedly to get all packets for the frame. The method returns
// false as long as there are more packets left to fetch.
//
#ifndef WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_VP9_H_
#define WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_VP9_H_
#include <queue>
#include <string>
#include "webrtc/base/constructormagic.h"
#include "webrtc/modules/include/module_common_types.h"
#include "webrtc/modules/rtp_rtcp/source/rtp_format.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class RtpPacketizerVp9 : public RtpPacketizer {
public:
RtpPacketizerVp9(const RTPVideoHeaderVP9& hdr, size_t max_payload_length);
virtual ~RtpPacketizerVp9();
ProtectionType GetProtectionType() override;
StorageType GetStorageType(uint32_t retransmission_settings) override;
std::string ToString() override;
// The payload data must be one encoded VP9 frame.
void SetPayloadData(const uint8_t* payload,
size_t payload_size,
const RTPFragmentationHeader* fragmentation) override;
// Gets the next payload with VP9 payload header.
// |buffer| is a pointer to where the output will be written.
// |bytes_to_send| is an output variable that will contain number of bytes
// written to buffer.
// |last_packet| is true for the last packet of the frame, false otherwise
// (i.e. call the function again to get the next packet).
// Returns true on success, false otherwise.
bool NextPacket(uint8_t* buffer,
size_t* bytes_to_send,
bool* last_packet) override;
typedef struct {
size_t payload_start_pos;
size_t size;
bool layer_begin;
bool layer_end;
} PacketInfo;
typedef std::queue<PacketInfo> PacketInfoQueue;
private:
// Calculates all packet sizes and loads info to packet queue.
void GeneratePackets();
// Writes the payload descriptor header and copies payload to the |buffer|.
// |packet_info| determines which part of the payload to write.
// |bytes_to_send| contains the number of written bytes to the buffer.
// Returns true on success, false otherwise.
bool WriteHeaderAndPayload(const PacketInfo& packet_info,
uint8_t* buffer,
size_t* bytes_to_send) const;
// Writes payload descriptor header to |buffer|.
// Returns true on success, false otherwise.
bool WriteHeader(const PacketInfo& packet_info,
uint8_t* buffer,
size_t* header_length) const;
const RTPVideoHeaderVP9 hdr_;
const size_t max_payload_length_; // The max length in bytes of one packet.
const uint8_t* payload_; // The payload data to be packetized.
size_t payload_size_; // The size in bytes of the payload data.
PacketInfoQueue packets_;
RTC_DISALLOW_COPY_AND_ASSIGN(RtpPacketizerVp9);
};
class RtpDepacketizerVp9 : public RtpDepacketizer {
public:
virtual ~RtpDepacketizerVp9() {}
bool Parse(ParsedPayload* parsed_payload,
const uint8_t* payload,
size_t payload_length) override;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_VP9_H_

View File

@ -0,0 +1,690 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <vector>
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/modules/rtp_rtcp/source/rtp_format_vp9.h"
#include "webrtc/typedefs.h"
namespace webrtc {
namespace {
void VerifyHeader(const RTPVideoHeaderVP9& expected,
const RTPVideoHeaderVP9& actual) {
EXPECT_EQ(expected.inter_layer_predicted, actual.inter_layer_predicted);
EXPECT_EQ(expected.inter_pic_predicted, actual.inter_pic_predicted);
EXPECT_EQ(expected.flexible_mode, actual.flexible_mode);
EXPECT_EQ(expected.beginning_of_frame, actual.beginning_of_frame);
EXPECT_EQ(expected.end_of_frame, actual.end_of_frame);
EXPECT_EQ(expected.ss_data_available, actual.ss_data_available);
EXPECT_EQ(expected.picture_id, actual.picture_id);
EXPECT_EQ(expected.max_picture_id, actual.max_picture_id);
EXPECT_EQ(expected.temporal_idx, actual.temporal_idx);
EXPECT_EQ(expected.spatial_idx == kNoSpatialIdx ? 0 : expected.spatial_idx,
actual.spatial_idx);
EXPECT_EQ(expected.gof_idx, actual.gof_idx);
EXPECT_EQ(expected.tl0_pic_idx, actual.tl0_pic_idx);
EXPECT_EQ(expected.temporal_up_switch, actual.temporal_up_switch);
EXPECT_EQ(expected.num_ref_pics, actual.num_ref_pics);
for (uint8_t i = 0; i < expected.num_ref_pics; ++i) {
EXPECT_EQ(expected.pid_diff[i], actual.pid_diff[i]);
EXPECT_EQ(expected.ref_picture_id[i], actual.ref_picture_id[i]);
}
if (expected.ss_data_available) {
EXPECT_EQ(expected.spatial_layer_resolution_present,
actual.spatial_layer_resolution_present);
EXPECT_EQ(expected.num_spatial_layers, actual.num_spatial_layers);
if (expected.spatial_layer_resolution_present) {
for (size_t i = 0; i < expected.num_spatial_layers; i++) {
EXPECT_EQ(expected.width[i], actual.width[i]);
EXPECT_EQ(expected.height[i], actual.height[i]);
}
}
EXPECT_EQ(expected.gof.num_frames_in_gof, actual.gof.num_frames_in_gof);
for (size_t i = 0; i < expected.gof.num_frames_in_gof; i++) {
EXPECT_EQ(expected.gof.temporal_up_switch[i],
actual.gof.temporal_up_switch[i]);
EXPECT_EQ(expected.gof.temporal_idx[i], actual.gof.temporal_idx[i]);
EXPECT_EQ(expected.gof.num_ref_pics[i], actual.gof.num_ref_pics[i]);
for (uint8_t j = 0; j < expected.gof.num_ref_pics[i]; j++) {
EXPECT_EQ(expected.gof.pid_diff[i][j], actual.gof.pid_diff[i][j]);
}
}
}
}
void VerifyPayload(const RtpDepacketizer::ParsedPayload& parsed,
const uint8_t* payload,
size_t payload_length) {
EXPECT_EQ(payload, parsed.payload);
EXPECT_EQ(payload_length, parsed.payload_length);
EXPECT_THAT(std::vector<uint8_t>(parsed.payload,
parsed.payload + parsed.payload_length),
::testing::ElementsAreArray(payload, payload_length));
}
void ParseAndCheckPacket(const uint8_t* packet,
const RTPVideoHeaderVP9& expected,
size_t expected_hdr_length,
size_t expected_length) {
rtc::scoped_ptr<RtpDepacketizer> depacketizer(new RtpDepacketizerVp9());
RtpDepacketizer::ParsedPayload parsed;
ASSERT_TRUE(depacketizer->Parse(&parsed, packet, expected_length));
EXPECT_EQ(kRtpVideoVp9, parsed.type.Video.codec);
VerifyHeader(expected, parsed.type.Video.codecHeader.VP9);
const size_t kExpectedPayloadLength = expected_length - expected_hdr_length;
VerifyPayload(parsed, packet + expected_hdr_length, kExpectedPayloadLength);
}
} // namespace
// Payload descriptor for flexible mode
// 0 1 2 3 4 5 6 7
// +-+-+-+-+-+-+-+-+
// |I|P|L|F|B|E|V|-| (REQUIRED)
// +-+-+-+-+-+-+-+-+
// I: |M| PICTURE ID | (RECOMMENDED)
// +-+-+-+-+-+-+-+-+
// M: | EXTENDED PID | (RECOMMENDED)
// +-+-+-+-+-+-+-+-+
// L: | T |U| S |D| (CONDITIONALLY RECOMMENDED)
// +-+-+-+-+-+-+-+-+ -|
// P,F: | P_DIFF |N| (CONDITIONALLY RECOMMENDED) . up to 3 times
// +-+-+-+-+-+-+-+-+ -|
// V: | SS |
// | .. |
// +-+-+-+-+-+-+-+-+
//
// Payload descriptor for non-flexible mode
// 0 1 2 3 4 5 6 7
// +-+-+-+-+-+-+-+-+
// |I|P|L|F|B|E|V|-| (REQUIRED)
// +-+-+-+-+-+-+-+-+
// I: |M| PICTURE ID | (RECOMMENDED)
// +-+-+-+-+-+-+-+-+
// M: | EXTENDED PID | (RECOMMENDED)
// +-+-+-+-+-+-+-+-+
// L: | T |U| S |D| (CONDITIONALLY RECOMMENDED)
// +-+-+-+-+-+-+-+-+
// | TL0PICIDX | (CONDITIONALLY REQUIRED)
// +-+-+-+-+-+-+-+-+
// V: | SS |
// | .. |
// +-+-+-+-+-+-+-+-+
class RtpPacketizerVp9Test : public ::testing::Test {
protected:
RtpPacketizerVp9Test() {}
virtual void SetUp() {
expected_.InitRTPVideoHeaderVP9();
}
rtc::scoped_ptr<uint8_t[]> packet_;
rtc::scoped_ptr<uint8_t[]> payload_;
size_t payload_size_;
size_t payload_pos_;
RTPVideoHeaderVP9 expected_;
rtc::scoped_ptr<RtpPacketizerVp9> packetizer_;
void Init(size_t payload_size, size_t packet_size) {
payload_.reset(new uint8_t[payload_size]);
memset(payload_.get(), 7, payload_size);
payload_size_ = payload_size;
payload_pos_ = 0;
packetizer_.reset(new RtpPacketizerVp9(expected_, packet_size));
packetizer_->SetPayloadData(payload_.get(), payload_size_, NULL);
const int kMaxPayloadDescriptorLength = 100;
packet_.reset(new uint8_t[payload_size_ + kMaxPayloadDescriptorLength]);
}
void CheckPayload(const uint8_t* packet,
size_t start_pos,
size_t end_pos,
bool last) {
for (size_t i = start_pos; i < end_pos; ++i) {
EXPECT_EQ(packet[i], payload_[payload_pos_++]);
}
EXPECT_EQ(last, payload_pos_ == payload_size_);
}
void CreateParseAndCheckPackets(const size_t* expected_hdr_sizes,
const size_t* expected_sizes,
size_t expected_num_packets) {
ASSERT_TRUE(packetizer_.get() != NULL);
size_t length = 0;
bool last = false;
if (expected_num_packets == 0) {
EXPECT_FALSE(packetizer_->NextPacket(packet_.get(), &length, &last));
return;
}
for (size_t i = 0; i < expected_num_packets; ++i) {
EXPECT_TRUE(packetizer_->NextPacket(packet_.get(), &length, &last));
EXPECT_EQ(expected_sizes[i], length);
RTPVideoHeaderVP9 hdr = expected_;
hdr.beginning_of_frame = (i == 0);
hdr.end_of_frame = last;
ParseAndCheckPacket(packet_.get(), hdr, expected_hdr_sizes[i], length);
CheckPayload(packet_.get(), expected_hdr_sizes[i], length, last);
}
EXPECT_TRUE(last);
}
};
TEST_F(RtpPacketizerVp9Test, TestEqualSizedMode_OnePacket) {
const size_t kFrameSize = 25;
const size_t kPacketSize = 26;
Init(kFrameSize, kPacketSize);
// One packet:
// I:0, P:0, L:0, F:0, B:1, E:1, V:0 (1hdr + 25 payload)
const size_t kExpectedHdrSizes[] = {1};
const size_t kExpectedSizes[] = {26};
const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
}
TEST_F(RtpPacketizerVp9Test, TestEqualSizedMode_TwoPackets) {
const size_t kFrameSize = 27;
const size_t kPacketSize = 27;
Init(kFrameSize, kPacketSize);
// Two packets:
// I:0, P:0, L:0, F:0, B:1, E:0, V:0 (1hdr + 14 payload)
// I:0, P:0, L:0, F:0, B:0, E:1, V:0 (1hdr + 13 payload)
const size_t kExpectedHdrSizes[] = {1, 1};
const size_t kExpectedSizes[] = {15, 14};
const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
}
TEST_F(RtpPacketizerVp9Test, TestTooShortBufferToFitPayload) {
const size_t kFrameSize = 1;
const size_t kPacketSize = 1;
Init(kFrameSize, kPacketSize); // 1hdr + 1 payload
const size_t kExpectedNum = 0;
CreateParseAndCheckPackets(NULL, NULL, kExpectedNum);
}
TEST_F(RtpPacketizerVp9Test, TestOneBytePictureId) {
const size_t kFrameSize = 30;
const size_t kPacketSize = 12;
expected_.picture_id = kMaxOneBytePictureId; // 2 byte payload descriptor
expected_.max_picture_id = kMaxOneBytePictureId;
Init(kFrameSize, kPacketSize);
// Three packets:
// I:1, P:0, L:0, F:0, B:1, E:0, V:0 (2hdr + 10 payload)
// I:1, P:0, L:0, F:0, B:0, E:0, V:0 (2hdr + 10 payload)
// I:1, P:0, L:0, F:0, B:0, E:1, V:0 (2hdr + 10 payload)
const size_t kExpectedHdrSizes[] = {2, 2, 2};
const size_t kExpectedSizes[] = {12, 12, 12};
const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
}
TEST_F(RtpPacketizerVp9Test, TestTwoBytePictureId) {
const size_t kFrameSize = 31;
const size_t kPacketSize = 13;
expected_.picture_id = kMaxTwoBytePictureId; // 3 byte payload descriptor
Init(kFrameSize, kPacketSize);
// Four packets:
// I:1, P:0, L:0, F:0, B:1, E:0, V:0 (3hdr + 8 payload)
// I:1, P:0, L:0, F:0, B:0, E:0, V:0 (3hdr + 8 payload)
// I:1, P:0, L:0, F:0, B:0, E:0, V:0 (3hdr + 8 payload)
// I:1, P:0, L:0, F:0, B:0, E:1, V:0 (3hdr + 7 payload)
const size_t kExpectedHdrSizes[] = {3, 3, 3, 3};
const size_t kExpectedSizes[] = {11, 11, 11, 10};
const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
}
TEST_F(RtpPacketizerVp9Test, TestLayerInfoWithNonFlexibleMode) {
const size_t kFrameSize = 30;
const size_t kPacketSize = 25;
expected_.temporal_idx = 3;
expected_.temporal_up_switch = true; // U
expected_.num_spatial_layers = 3;
expected_.spatial_idx = 2;
expected_.inter_layer_predicted = true; // D
expected_.tl0_pic_idx = 117;
Init(kFrameSize, kPacketSize);
// Two packets:
// | I:0, P:0, L:1, F:0, B:1, E:0, V:0 | (3hdr + 15 payload)
// L: | T:3, U:1, S:2, D:1 | TL0PICIDX:117 |
// | I:0, P:0, L:1, F:0, B:0, E:1, V:0 | (3hdr + 15 payload)
// L: | T:3, U:1, S:2, D:1 | TL0PICIDX:117 |
const size_t kExpectedHdrSizes[] = {3, 3};
const size_t kExpectedSizes[] = {18, 18};
const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
}
TEST_F(RtpPacketizerVp9Test, TestLayerInfoWithFlexibleMode) {
const size_t kFrameSize = 21;
const size_t kPacketSize = 23;
expected_.flexible_mode = true;
expected_.temporal_idx = 3;
expected_.temporal_up_switch = true; // U
expected_.num_spatial_layers = 3;
expected_.spatial_idx = 2;
expected_.inter_layer_predicted = false; // D
Init(kFrameSize, kPacketSize);
// One packet:
// I:0, P:0, L:1, F:1, B:1, E:1, V:0 (2hdr + 21 payload)
// L: T:3, U:1, S:2, D:0
const size_t kExpectedHdrSizes[] = {2};
const size_t kExpectedSizes[] = {23};
const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
}
TEST_F(RtpPacketizerVp9Test, TestRefIdx) {
const size_t kFrameSize = 16;
const size_t kPacketSize = 21;
expected_.inter_pic_predicted = true; // P
expected_.flexible_mode = true; // F
expected_.picture_id = 2;
expected_.max_picture_id = kMaxOneBytePictureId;
expected_.num_ref_pics = 3;
expected_.pid_diff[0] = 1;
expected_.pid_diff[1] = 3;
expected_.pid_diff[2] = 127;
expected_.ref_picture_id[0] = 1; // 2 - 1 = 1
expected_.ref_picture_id[1] = 127; // (kMaxPictureId + 1) + 2 - 3 = 127
expected_.ref_picture_id[2] = 3; // (kMaxPictureId + 1) + 2 - 127 = 3
Init(kFrameSize, kPacketSize);
// Two packets:
// I:1, P:1, L:0, F:1, B:1, E:1, V:0 (5hdr + 16 payload)
// I: 2
// P,F: P_DIFF:1, N:1
// P_DIFF:3, N:1
// P_DIFF:127, N:0
const size_t kExpectedHdrSizes[] = {5};
const size_t kExpectedSizes[] = {21};
const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
}
TEST_F(RtpPacketizerVp9Test, TestRefIdxFailsWithoutPictureId) {
const size_t kFrameSize = 16;
const size_t kPacketSize = 21;
expected_.inter_pic_predicted = true;
expected_.flexible_mode = true;
expected_.num_ref_pics = 1;
expected_.pid_diff[0] = 3;
Init(kFrameSize, kPacketSize);
const size_t kExpectedNum = 0;
CreateParseAndCheckPackets(NULL, NULL, kExpectedNum);
}
TEST_F(RtpPacketizerVp9Test, TestSsDataWithoutSpatialResolutionPresent) {
const size_t kFrameSize = 21;
const size_t kPacketSize = 26;
expected_.ss_data_available = true;
expected_.num_spatial_layers = 1;
expected_.spatial_layer_resolution_present = false;
expected_.gof.num_frames_in_gof = 1;
expected_.gof.temporal_idx[0] = 0;
expected_.gof.temporal_up_switch[0] = true;
expected_.gof.num_ref_pics[0] = 1;
expected_.gof.pid_diff[0][0] = 4;
Init(kFrameSize, kPacketSize);
// One packet:
// I:0, P:0, L:0, F:0, B:1, E:1, V:1 (5hdr + 21 payload)
// N_S:0, Y:0, G:1
// N_G:1
// T:0, U:1, R:1 | P_DIFF[0][0]:4
const size_t kExpectedHdrSizes[] = {5};
const size_t kExpectedSizes[] = {26};
const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
}
TEST_F(RtpPacketizerVp9Test, TestSsDataWithoutGbitPresent) {
const size_t kFrameSize = 21;
const size_t kPacketSize = 23;
expected_.ss_data_available = true;
expected_.num_spatial_layers = 1;
expected_.spatial_layer_resolution_present = false;
expected_.gof.num_frames_in_gof = 0;
Init(kFrameSize, kPacketSize);
// One packet:
// I:0, P:0, L:0, F:0, B:1, E:1, V:1 (2hdr + 21 payload)
// N_S:0, Y:0, G:0
const size_t kExpectedHdrSizes[] = {2};
const size_t kExpectedSizes[] = {23};
const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
}
TEST_F(RtpPacketizerVp9Test, TestSsData) {
const size_t kFrameSize = 21;
const size_t kPacketSize = 40;
expected_.ss_data_available = true;
expected_.num_spatial_layers = 2;
expected_.spatial_layer_resolution_present = true;
expected_.width[0] = 640;
expected_.width[1] = 1280;
expected_.height[0] = 360;
expected_.height[1] = 720;
expected_.gof.num_frames_in_gof = 3;
expected_.gof.temporal_idx[0] = 0;
expected_.gof.temporal_idx[1] = 1;
expected_.gof.temporal_idx[2] = 2;
expected_.gof.temporal_up_switch[0] = true;
expected_.gof.temporal_up_switch[1] = true;
expected_.gof.temporal_up_switch[2] = false;
expected_.gof.num_ref_pics[0] = 0;
expected_.gof.num_ref_pics[1] = 3;
expected_.gof.num_ref_pics[2] = 2;
expected_.gof.pid_diff[1][0] = 5;
expected_.gof.pid_diff[1][1] = 6;
expected_.gof.pid_diff[1][2] = 7;
expected_.gof.pid_diff[2][0] = 8;
expected_.gof.pid_diff[2][1] = 9;
Init(kFrameSize, kPacketSize);
// One packet:
// I:0, P:0, L:0, F:0, B:1, E:1, V:1 (19hdr + 21 payload)
// N_S:1, Y:1, G:1
// WIDTH:640 // 2 bytes
// HEIGHT:360 // 2 bytes
// WIDTH:1280 // 2 bytes
// HEIGHT:720 // 2 bytes
// N_G:3
// T:0, U:1, R:0
// T:1, U:1, R:3 | P_DIFF[1][0]:5 | P_DIFF[1][1]:6 | P_DIFF[1][2]:7
// T:2, U:0, R:2 | P_DIFF[2][0]:8 | P_DIFF[2][0]:9
const size_t kExpectedHdrSizes[] = {19};
const size_t kExpectedSizes[] = {40};
const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
}
TEST_F(RtpPacketizerVp9Test, TestBaseLayerProtectionAndStorageType) {
const size_t kFrameSize = 10;
const size_t kPacketSize = 12;
// I:0, P:0, L:1, F:1, B:1, E:1, V:0 (2hdr + 10 payload)
// L: T:0, U:0, S:0, D:0
expected_.flexible_mode = true;
expected_.temporal_idx = 0;
Init(kFrameSize, kPacketSize);
EXPECT_EQ(kProtectedPacket, packetizer_->GetProtectionType());
EXPECT_EQ(kAllowRetransmission,
packetizer_->GetStorageType(kRetransmitBaseLayer));
EXPECT_EQ(kDontRetransmit, packetizer_->GetStorageType(kRetransmitOff));
}
TEST_F(RtpPacketizerVp9Test, TestHigherLayerProtectionAndStorageType) {
const size_t kFrameSize = 10;
const size_t kPacketSize = 12;
// I:0, P:0, L:1, F:1, B:1, E:1, V:0 (2hdr + 10 payload)
// L: T:1, U:0, S:0, D:0
expected_.flexible_mode = true;
expected_.temporal_idx = 1;
Init(kFrameSize, kPacketSize);
EXPECT_EQ(kUnprotectedPacket, packetizer_->GetProtectionType());
EXPECT_EQ(kDontRetransmit, packetizer_->GetStorageType(kRetransmitBaseLayer));
EXPECT_EQ(kAllowRetransmission,
packetizer_->GetStorageType(kRetransmitHigherLayers));
}
class RtpDepacketizerVp9Test : public ::testing::Test {
protected:
RtpDepacketizerVp9Test()
: depacketizer_(new RtpDepacketizerVp9()) {}
virtual void SetUp() {
expected_.InitRTPVideoHeaderVP9();
}
RTPVideoHeaderVP9 expected_;
rtc::scoped_ptr<RtpDepacketizer> depacketizer_;
};
TEST_F(RtpDepacketizerVp9Test, ParseBasicHeader) {
const uint8_t kHeaderLength = 1;
uint8_t packet[4] = {0};
packet[0] = 0x0C; // I:0 P:0 L:0 F:0 B:1 E:1 V:0 R:0
expected_.beginning_of_frame = true;
expected_.end_of_frame = true;
ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet));
}
TEST_F(RtpDepacketizerVp9Test, ParseOneBytePictureId) {
const uint8_t kHeaderLength = 2;
uint8_t packet[10] = {0};
packet[0] = 0x80; // I:1 P:0 L:0 F:0 B:0 E:0 V:0 R:0
packet[1] = kMaxOneBytePictureId;
expected_.picture_id = kMaxOneBytePictureId;
expected_.max_picture_id = kMaxOneBytePictureId;
ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet));
}
TEST_F(RtpDepacketizerVp9Test, ParseTwoBytePictureId) {
const uint8_t kHeaderLength = 3;
uint8_t packet[10] = {0};
packet[0] = 0x80; // I:1 P:0 L:0 F:0 B:0 E:0 V:0 R:0
packet[1] = 0x80 | ((kMaxTwoBytePictureId >> 8) & 0x7F);
packet[2] = kMaxTwoBytePictureId & 0xFF;
expected_.picture_id = kMaxTwoBytePictureId;
expected_.max_picture_id = kMaxTwoBytePictureId;
ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet));
}
TEST_F(RtpDepacketizerVp9Test, ParseLayerInfoWithNonFlexibleMode) {
const uint8_t kHeaderLength = 3;
const uint8_t kTemporalIdx = 2;
const uint8_t kUbit = 1;
const uint8_t kSpatialIdx = 1;
const uint8_t kDbit = 1;
const uint8_t kTl0PicIdx = 17;
uint8_t packet[13] = {0};
packet[0] = 0x20; // I:0 P:0 L:1 F:0 B:0 E:0 V:0 R:0
packet[1] = (kTemporalIdx << 5) | (kUbit << 4) | (kSpatialIdx << 1) | kDbit;
packet[2] = kTl0PicIdx;
// T:2 U:1 S:1 D:1
// TL0PICIDX:17
expected_.temporal_idx = kTemporalIdx;
expected_.temporal_up_switch = kUbit ? true : false;
expected_.spatial_idx = kSpatialIdx;
expected_.inter_layer_predicted = kDbit ? true : false;
expected_.tl0_pic_idx = kTl0PicIdx;
ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet));
}
TEST_F(RtpDepacketizerVp9Test, ParseLayerInfoWithFlexibleMode) {
const uint8_t kHeaderLength = 2;
const uint8_t kTemporalIdx = 2;
const uint8_t kUbit = 1;
const uint8_t kSpatialIdx = 0;
const uint8_t kDbit = 0;
uint8_t packet[13] = {0};
packet[0] = 0x38; // I:0 P:0 L:1 F:1 B:1 E:0 V:0 R:0
packet[1] = (kTemporalIdx << 5) | (kUbit << 4) | (kSpatialIdx << 1) | kDbit;
// I:0 P:0 L:1 F:1 B:1 E:0 V:0
// L: T:2 U:1 S:0 D:0
expected_.beginning_of_frame = true;
expected_.flexible_mode = true;
expected_.temporal_idx = kTemporalIdx;
expected_.temporal_up_switch = kUbit ? true : false;
expected_.spatial_idx = kSpatialIdx;
expected_.inter_layer_predicted = kDbit ? true : false;
ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet));
}
TEST_F(RtpDepacketizerVp9Test, ParseRefIdx) {
const uint8_t kHeaderLength = 6;
const int16_t kPictureId = 17;
const uint8_t kPdiff1 = 17;
const uint8_t kPdiff2 = 18;
const uint8_t kPdiff3 = 127;
uint8_t packet[13] = {0};
packet[0] = 0xD8; // I:1 P:1 L:0 F:1 B:1 E:0 V:0 R:0
packet[1] = 0x80 | ((kPictureId >> 8) & 0x7F); // Two byte pictureID.
packet[2] = kPictureId;
packet[3] = (kPdiff1 << 1) | 1; // P_DIFF N:1
packet[4] = (kPdiff2 << 1) | 1; // P_DIFF N:1
packet[5] = (kPdiff3 << 1) | 0; // P_DIFF N:0
// I:1 P:1 L:0 F:1 B:1 E:0 V:0
// I: PICTURE ID:17
// I:
// P,F: P_DIFF:17 N:1 => refPicId = 17 - 17 = 0
// P,F: P_DIFF:18 N:1 => refPicId = (kMaxPictureId + 1) + 17 - 18 = 0x7FFF
// P,F: P_DIFF:127 N:0 => refPicId = (kMaxPictureId + 1) + 17 - 127 = 32658
expected_.beginning_of_frame = true;
expected_.inter_pic_predicted = true;
expected_.flexible_mode = true;
expected_.picture_id = kPictureId;
expected_.num_ref_pics = 3;
expected_.pid_diff[0] = kPdiff1;
expected_.pid_diff[1] = kPdiff2;
expected_.pid_diff[2] = kPdiff3;
expected_.ref_picture_id[0] = 0;
expected_.ref_picture_id[1] = 0x7FFF;
expected_.ref_picture_id[2] = 32658;
ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet));
}
TEST_F(RtpDepacketizerVp9Test, ParseRefIdxFailsWithNoPictureId) {
const uint8_t kPdiff = 3;
uint8_t packet[13] = {0};
packet[0] = 0x58; // I:0 P:1 L:0 F:1 B:1 E:0 V:0 R:0
packet[1] = (kPdiff << 1); // P,F: P_DIFF:3 N:0
RtpDepacketizer::ParsedPayload parsed;
EXPECT_FALSE(depacketizer_->Parse(&parsed, packet, sizeof(packet)));
}
TEST_F(RtpDepacketizerVp9Test, ParseRefIdxFailsWithTooManyRefPics) {
const uint8_t kPdiff = 3;
uint8_t packet[13] = {0};
packet[0] = 0xD8; // I:1 P:1 L:0 F:1 B:1 E:0 V:0 R:0
packet[1] = kMaxOneBytePictureId; // I: PICTURE ID:127
packet[2] = (kPdiff << 1) | 1; // P,F: P_DIFF:3 N:1
packet[3] = (kPdiff << 1) | 1; // P,F: P_DIFF:3 N:1
packet[4] = (kPdiff << 1) | 1; // P,F: P_DIFF:3 N:1
packet[5] = (kPdiff << 1) | 0; // P,F: P_DIFF:3 N:0
RtpDepacketizer::ParsedPayload parsed;
EXPECT_FALSE(depacketizer_->Parse(&parsed, packet, sizeof(packet)));
}
TEST_F(RtpDepacketizerVp9Test, ParseSsData) {
const uint8_t kHeaderLength = 6;
const uint8_t kYbit = 0;
const size_t kNs = 2;
const size_t kNg = 2;
uint8_t packet[23] = {0};
packet[0] = 0x0A; // I:0 P:0 L:0 F:0 B:1 E:0 V:1 R:0
packet[1] = ((kNs - 1) << 5) | (kYbit << 4) | (1 << 3); // N_S Y G:1 -
packet[2] = kNg; // N_G
packet[3] = (0 << 5) | (1 << 4) | (0 << 2) | 0; // T:0 U:1 R:0 -
packet[4] = (2 << 5) | (0 << 4) | (1 << 2) | 0; // T:2 U:0 R:1 -
packet[5] = 33;
expected_.beginning_of_frame = true;
expected_.ss_data_available = true;
expected_.num_spatial_layers = kNs;
expected_.spatial_layer_resolution_present = kYbit ? true : false;
expected_.gof.num_frames_in_gof = kNg;
expected_.gof.temporal_idx[0] = 0;
expected_.gof.temporal_idx[1] = 2;
expected_.gof.temporal_up_switch[0] = true;
expected_.gof.temporal_up_switch[1] = false;
expected_.gof.num_ref_pics[0] = 0;
expected_.gof.num_ref_pics[1] = 1;
expected_.gof.pid_diff[1][0] = 33;
ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet));
}
TEST_F(RtpDepacketizerVp9Test, ParseFirstPacketInKeyFrame) {
uint8_t packet[2] = {0};
packet[0] = 0x08; // I:0 P:0 L:0 F:0 B:1 E:0 V:0 R:0
RtpDepacketizer::ParsedPayload parsed;
ASSERT_TRUE(depacketizer_->Parse(&parsed, packet, sizeof(packet)));
EXPECT_EQ(kVideoFrameKey, parsed.frame_type);
EXPECT_TRUE(parsed.type.Video.isFirstPacket);
}
TEST_F(RtpDepacketizerVp9Test, ParseLastPacketInDeltaFrame) {
uint8_t packet[2] = {0};
packet[0] = 0x44; // I:0 P:1 L:0 F:0 B:0 E:1 V:0 R:0
RtpDepacketizer::ParsedPayload parsed;
ASSERT_TRUE(depacketizer_->Parse(&parsed, packet, sizeof(packet)));
EXPECT_EQ(kVideoFrameDelta, parsed.frame_type);
EXPECT_FALSE(parsed.type.Video.isFirstPacket);
}
TEST_F(RtpDepacketizerVp9Test, ParseResolution) {
const uint16_t kWidth[2] = {640, 1280};
const uint16_t kHeight[2] = {360, 720};
uint8_t packet[20] = {0};
packet[0] = 0x0A; // I:0 P:0 L:0 F:0 B:1 E:0 V:1 R:0
packet[1] = (1 << 5) | (1 << 4) | 0; // N_S:1 Y:1 G:0
packet[2] = kWidth[0] >> 8;
packet[3] = kWidth[0] & 0xFF;
packet[4] = kHeight[0] >> 8;
packet[5] = kHeight[0] & 0xFF;
packet[6] = kWidth[1] >> 8;
packet[7] = kWidth[1] & 0xFF;
packet[8] = kHeight[1] >> 8;
packet[9] = kHeight[1] & 0xFF;
RtpDepacketizer::ParsedPayload parsed;
ASSERT_TRUE(depacketizer_->Parse(&parsed, packet, sizeof(packet)));
EXPECT_EQ(kWidth[0], parsed.type.Video.width);
EXPECT_EQ(kHeight[0], parsed.type.Video.height);
}
TEST_F(RtpDepacketizerVp9Test, ParseFailsForNoPayloadLength) {
uint8_t packet[1] = {0};
RtpDepacketizer::ParsedPayload parsed;
EXPECT_FALSE(depacketizer_->Parse(&parsed, packet, 0));
}
TEST_F(RtpDepacketizerVp9Test, ParseFailsForTooShortBufferToFitPayload) {
const uint8_t kHeaderLength = 1;
uint8_t packet[kHeaderLength] = {0};
RtpDepacketizer::ParsedPayload parsed;
EXPECT_FALSE(depacketizer_->Parse(&parsed, packet, sizeof(packet)));
}
} // namespace webrtc

View File

@ -14,11 +14,15 @@
#include <stdlib.h>
#include <string.h>
#include "webrtc/base/checks.h"
#include "webrtc/base/logging.h"
#include "webrtc/base/trace_event.h"
#include "webrtc/modules/rtp_rtcp/interface/rtp_rtcp_defines.h"
#include "webrtc/modules/rtp_rtcp/source/byte_io.h"
#include "webrtc/modules/rtp_rtcp/source/producer_fec.h"
#include "webrtc/modules/rtp_rtcp/source/rtp_format_video_generic.h"
#include "webrtc/modules/rtp_rtcp/source/rtp_format_vp8.h"
#include "webrtc/modules/rtp_rtcp/source/rtp_format_vp9.h"
#include "webrtc/modules/rtp_rtcp/source/rtp_format_h264.h"
#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
#include "webrtc/system_wrappers/interface/logging.h"
@ -323,7 +327,7 @@ bool RTPSenderVideo::Send(const RtpVideoCodecTypes videoType,
// output multiple partitions for VP8. Should remove below check after the
// issue is fixed.
const RTPFragmentationHeader* frag =
(videoType == kRtpVideoVp8 || videoType == kRtpVideoVp9) ? NULL : fragmentation;
(videoType == kRtpVideoVp8) ? NULL : fragmentation;
packetizer->SetPayloadData(data, payload_bytes_to_send, frag);
@ -360,7 +364,7 @@ bool RTPSenderVideo::Send(const RtpVideoCodecTypes videoType,
// a lock. It'll be a no-op if it's not registered.
// TODO(guoweis): For now, all packets sent will carry the CVO such that
// the RTP header length is consistent, although the receiver side will
// only exam the packets with market bit set.
// only exam the packets with marker bit set.
size_t packetSize = payloadSize + rtp_header_length;
RtpUtility::RtpHeaderParser rtp_parser(dataBuffer, packetSize);
RTPHeader rtp_header;

View File

@ -48,11 +48,30 @@ struct CodecSpecificInfoVP9 {
bool hasReceivedRPSI;
uint64_t pictureIdRPSI;
int16_t pictureId; // Negative value to skip pictureId.
bool nonReference;
uint8_t temporalIdx;
bool layerSync;
int tl0PicIdx; // Negative value to skip tl0PicIdx.
int8_t keyIdx; // Negative value to skip keyIdx.
bool inter_pic_predicted; // This layer frame is dependent on previously
// coded frame(s).
bool flexible_mode;
bool ss_data_available;
int tl0_pic_idx; // Negative value to skip tl0PicIdx.
uint8_t temporal_idx;
uint8_t spatial_idx;
bool temporal_up_switch;
bool inter_layer_predicted; // Frame is dependent on directly lower spatial
// layer frame.
uint8_t gof_idx;
// SS data.
size_t num_spatial_layers; // Always populated.
bool spatial_layer_resolution_present;
uint16_t width[kMaxVp9NumberOfSpatialLayers];
uint16_t height[kMaxVp9NumberOfSpatialLayers];
GofInfoVP9 gof;
// Frame reference data.
uint8_t num_ref_pics;
uint8_t p_diff[kMaxVp9RefPics];
};
struct CodecSpecificInfoGeneric {

View File

@ -0,0 +1,93 @@
/* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <algorithm>
#include "webrtc/modules/video_coding/codecs/vp9/screenshare_layers.h"
#include "webrtc/base/checks.h"
namespace webrtc {
ScreenshareLayersVP9::ScreenshareLayersVP9(uint8_t num_layers)
: num_layers_(num_layers),
start_layer_(0),
last_timestamp_(0),
timestamp_initialized_(false) {
RTC_DCHECK_GT(num_layers, 0);
RTC_DCHECK_LE(num_layers, kMaxVp9NumberOfSpatialLayers);
memset(bits_used_, 0, sizeof(bits_used_));
memset(threshold_kbps_, 0, sizeof(threshold_kbps_));
}
uint8_t ScreenshareLayersVP9::GetStartLayer() const {
return start_layer_;
}
void ScreenshareLayersVP9::ConfigureBitrate(int threshold_kbps,
uint8_t layer_id) {
// The upper layer is always the layer we spill frames
// to when the bitrate becomes to high, therefore setting
// a max limit is not allowed. The top layer bitrate is
// never used either so configuring it makes no difference.
RTC_DCHECK_LT(layer_id, num_layers_ - 1);
threshold_kbps_[layer_id] = threshold_kbps;
}
void ScreenshareLayersVP9::LayerFrameEncoded(unsigned int size_bytes,
uint8_t layer_id) {
RTC_DCHECK_LT(layer_id, num_layers_);
bits_used_[layer_id] += size_bytes * 8;
}
VP9EncoderImpl::SuperFrameRefSettings
ScreenshareLayersVP9::GetSuperFrameSettings(uint32_t timestamp,
bool is_keyframe) {
VP9EncoderImpl::SuperFrameRefSettings settings;
if (!timestamp_initialized_) {
last_timestamp_ = timestamp;
timestamp_initialized_ = true;
}
float time_diff = (timestamp - last_timestamp_) / 90.f;
float total_bits_used = 0;
float total_threshold_kbps = 0;
start_layer_ = 0;
// Up to (num_layers - 1) because we only have
// (num_layers - 1) thresholds to check.
for (int layer_id = 0; layer_id < num_layers_ - 1; ++layer_id) {
bits_used_[layer_id] = std::max(
0.f, bits_used_[layer_id] - time_diff * threshold_kbps_[layer_id]);
total_bits_used += bits_used_[layer_id];
total_threshold_kbps += threshold_kbps_[layer_id];
// If this is a keyframe then there should be no
// references to any previous frames.
if (!is_keyframe) {
settings.layer[layer_id].ref_buf1 = layer_id;
if (total_bits_used > total_threshold_kbps * 1000)
start_layer_ = layer_id + 1;
}
settings.layer[layer_id].upd_buf = layer_id;
}
// Since the above loop does not iterate over the last layer
// the reference of the last layer has to be set after the loop,
// and if this is a keyframe there should be no references to
// any previous frames.
if (!is_keyframe)
settings.layer[num_layers_ - 1].ref_buf1 = num_layers_ - 1;
settings.layer[num_layers_ - 1].upd_buf = num_layers_ - 1;
settings.is_keyframe = is_keyframe;
settings.start_layer = start_layer_;
settings.stop_layer = num_layers_ - 1;
last_timestamp_ = timestamp;
return settings;
}
} // namespace webrtc

View File

@ -0,0 +1,66 @@
/* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_VIDEO_CODING_CODECS_VP9_SCREENSHARE_LAYERS_H_
#define WEBRTC_MODULES_VIDEO_CODING_CODECS_VP9_SCREENSHARE_LAYERS_H_
#include "webrtc/modules/video_coding/codecs/vp9/vp9_impl.h"
namespace webrtc {
class ScreenshareLayersVP9 {
public:
explicit ScreenshareLayersVP9(uint8_t num_layers);
// The target bitrate for layer with id layer_id.
void ConfigureBitrate(int threshold_kbps, uint8_t layer_id);
// The current start layer.
uint8_t GetStartLayer() const;
// Update the layer with the size of the layer frame.
void LayerFrameEncoded(unsigned int size_bytes, uint8_t layer_id);
// Get the layer settings for the next superframe.
//
// In short, each time the GetSuperFrameSettings is called the
// bitrate of every layer is calculated and if the cummulative
// bitrate exceeds the configured cummulative bitrates
// (ConfigureBitrate to configure) up to and including that
// layer then the resulting encoding settings for the
// superframe will only encode layers above that layer.
VP9EncoderImpl::SuperFrameRefSettings GetSuperFrameSettings(
uint32_t timestamp,
bool is_keyframe);
private:
// How many layers that are used.
uint8_t num_layers_;
// The index of the first layer to encode.
uint8_t start_layer_;
// Cummulative target kbps for the different layers.
float threshold_kbps_[kMaxVp9NumberOfSpatialLayers - 1];
// How many bits that has been used for a certain layer. Increased in
// FrameEncoded() by the size of the encoded frame and decreased in
// GetSuperFrameSettings() depending on the time between frames.
float bits_used_[kMaxVp9NumberOfSpatialLayers];
// Timestamp of last frame.
uint32_t last_timestamp_;
// If the last_timestamp_ has been set.
bool timestamp_initialized_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_VIDEO_CODING_CODECS_VP9_SCREENSHARE_LAYERS_H_

View File

@ -0,0 +1,323 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <limits>
#include "testing/gtest/include/gtest/gtest.h"
#include "vpx/vp8cx.h"
#include "webrtc/base/logging.h"
#include "webrtc/modules/video_coding/codecs/vp9/screenshare_layers.h"
#include "webrtc/modules/video_coding/codecs/vp9/vp9_impl.h"
#include "webrtc/system_wrappers/include/clock.h"
namespace webrtc {
typedef VP9EncoderImpl::SuperFrameRefSettings Settings;
const uint32_t kTickFrequency = 90000;
class ScreenshareLayerTestVP9 : public ::testing::Test {
protected:
ScreenshareLayerTestVP9() : clock_(0) {}
virtual ~ScreenshareLayerTestVP9() {}
void InitScreenshareLayers(int layers) {
layers_.reset(new ScreenshareLayersVP9(layers));
}
void ConfigureBitrateForLayer(int kbps, uint8_t layer_id) {
layers_->ConfigureBitrate(kbps, layer_id);
}
void AdvanceTime(int64_t milliseconds) {
clock_.AdvanceTimeMilliseconds(milliseconds);
}
void AddKilobitsToLayer(int kilobits, uint8_t layer_id) {
layers_->LayerFrameEncoded(kilobits * 1000 / 8, layer_id);
}
void EqualRefsForLayer(const Settings& actual, uint8_t layer_id) {
EXPECT_EQ(expected_.layer[layer_id].upd_buf,
actual.layer[layer_id].upd_buf);
EXPECT_EQ(expected_.layer[layer_id].ref_buf1,
actual.layer[layer_id].ref_buf1);
EXPECT_EQ(expected_.layer[layer_id].ref_buf2,
actual.layer[layer_id].ref_buf2);
EXPECT_EQ(expected_.layer[layer_id].ref_buf3,
actual.layer[layer_id].ref_buf3);
}
void EqualRefs(const Settings& actual) {
for (unsigned int layer_id = 0; layer_id < kMaxVp9NumberOfSpatialLayers;
++layer_id) {
EqualRefsForLayer(actual, layer_id);
}
}
void EqualStartStopKeyframe(const Settings& actual) {
EXPECT_EQ(expected_.start_layer, actual.start_layer);
EXPECT_EQ(expected_.stop_layer, actual.stop_layer);
EXPECT_EQ(expected_.is_keyframe, actual.is_keyframe);
}
// Check that the settings returned by GetSuperFrameSettings() is
// equal to the expected_ settings.
void EqualToExpected() {
uint32_t frame_timestamp_ =
clock_.TimeInMilliseconds() * (kTickFrequency / 1000);
Settings actual =
layers_->GetSuperFrameSettings(frame_timestamp_, expected_.is_keyframe);
EqualRefs(actual);
EqualStartStopKeyframe(actual);
}
Settings expected_;
SimulatedClock clock_;
rtc::scoped_ptr<ScreenshareLayersVP9> layers_;
};
TEST_F(ScreenshareLayerTestVP9, NoRefsOnKeyFrame) {
const int kNumLayers = kMaxVp9NumberOfSpatialLayers;
InitScreenshareLayers(kNumLayers);
expected_.start_layer = 0;
expected_.stop_layer = kNumLayers - 1;
for (int l = 0; l < kNumLayers; ++l) {
expected_.layer[l].upd_buf = l;
}
expected_.is_keyframe = true;
EqualToExpected();
for (int l = 0; l < kNumLayers; ++l) {
expected_.layer[l].ref_buf1 = l;
}
expected_.is_keyframe = false;
EqualToExpected();
}
// Test if it is possible to send at a high bitrate (over the threshold)
// after a longer period of low bitrate. This should not be possible.
TEST_F(ScreenshareLayerTestVP9, DontAccumelateAvailableBitsOverTime) {
InitScreenshareLayers(2);
ConfigureBitrateForLayer(100, 0);
expected_.layer[0].upd_buf = 0;
expected_.layer[0].ref_buf1 = 0;
expected_.layer[1].upd_buf = 1;
expected_.layer[1].ref_buf1 = 1;
expected_.start_layer = 0;
expected_.stop_layer = 1;
// Send 10 frames at a low bitrate (50 kbps)
for (int i = 0; i < 10; ++i) {
AdvanceTime(200);
EqualToExpected();
AddKilobitsToLayer(10, 0);
}
AdvanceTime(200);
EqualToExpected();
AddKilobitsToLayer(301, 0);
// Send 10 frames at a high bitrate (200 kbps)
expected_.start_layer = 1;
for (int i = 0; i < 10; ++i) {
AdvanceTime(200);
EqualToExpected();
AddKilobitsToLayer(40, 1);
}
}
// Test if used bits are accumelated over layers, as they should;
TEST_F(ScreenshareLayerTestVP9, AccumelateUsedBitsOverLayers) {
const int kNumLayers = kMaxVp9NumberOfSpatialLayers;
InitScreenshareLayers(kNumLayers);
for (int l = 0; l < kNumLayers - 1; ++l)
ConfigureBitrateForLayer(100, l);
for (int l = 0; l < kNumLayers; ++l) {
expected_.layer[l].upd_buf = l;
expected_.layer[l].ref_buf1 = l;
}
expected_.start_layer = 0;
expected_.stop_layer = kNumLayers - 1;
EqualToExpected();
for (int layer = 0; layer < kNumLayers - 1; ++layer) {
expected_.start_layer = layer;
EqualToExpected();
AddKilobitsToLayer(101, layer);
}
}
// General testing of the bitrate controller.
TEST_F(ScreenshareLayerTestVP9, 2LayerBitrate) {
InitScreenshareLayers(2);
ConfigureBitrateForLayer(100, 0);
expected_.layer[0].upd_buf = 0;
expected_.layer[1].upd_buf = 1;
expected_.layer[0].ref_buf1 = -1;
expected_.layer[1].ref_buf1 = -1;
expected_.start_layer = 0;
expected_.stop_layer = 1;
expected_.is_keyframe = true;
EqualToExpected();
AddKilobitsToLayer(100, 0);
expected_.layer[0].ref_buf1 = 0;
expected_.layer[1].ref_buf1 = 1;
expected_.is_keyframe = false;
AdvanceTime(199);
EqualToExpected();
AddKilobitsToLayer(100, 0);
expected_.start_layer = 1;
for (int frame = 0; frame < 3; ++frame) {
AdvanceTime(200);
EqualToExpected();
AddKilobitsToLayer(100, 1);
}
// Just before enough bits become available for L0 @0.999 seconds.
AdvanceTime(199);
EqualToExpected();
AddKilobitsToLayer(100, 1);
// Just after enough bits become available for L0 @1.0001 seconds.
expected_.start_layer = 0;
AdvanceTime(2);
EqualToExpected();
AddKilobitsToLayer(100, 0);
// Keyframes always encode all layers, even if it is over budget.
expected_.layer[0].ref_buf1 = -1;
expected_.layer[1].ref_buf1 = -1;
expected_.is_keyframe = true;
AdvanceTime(499);
EqualToExpected();
expected_.layer[0].ref_buf1 = 0;
expected_.layer[1].ref_buf1 = 1;
expected_.start_layer = 1;
expected_.is_keyframe = false;
EqualToExpected();
AddKilobitsToLayer(100, 0);
// 400 kb in L0 --> @3 second mark to fall below the threshold..
// just before @2.999 seconds.
expected_.is_keyframe = false;
AdvanceTime(1499);
EqualToExpected();
AddKilobitsToLayer(100, 1);
// just after @3.001 seconds.
expected_.start_layer = 0;
AdvanceTime(2);
EqualToExpected();
AddKilobitsToLayer(100, 0);
}
// General testing of the bitrate controller.
TEST_F(ScreenshareLayerTestVP9, 3LayerBitrate) {
InitScreenshareLayers(3);
ConfigureBitrateForLayer(100, 0);
ConfigureBitrateForLayer(100, 1);
for (int l = 0; l < 3; ++l) {
expected_.layer[l].upd_buf = l;
expected_.layer[l].ref_buf1 = l;
}
expected_.start_layer = 0;
expected_.stop_layer = 2;
EqualToExpected();
AddKilobitsToLayer(105, 0);
AddKilobitsToLayer(30, 1);
AdvanceTime(199);
EqualToExpected();
AddKilobitsToLayer(105, 0);
AddKilobitsToLayer(30, 1);
expected_.start_layer = 1;
AdvanceTime(200);
EqualToExpected();
AddKilobitsToLayer(130, 1);
expected_.start_layer = 2;
AdvanceTime(200);
EqualToExpected();
// 400 kb in L1 --> @1.0 second mark to fall below threshold.
// 210 kb in L0 --> @1.1 second mark to fall below threshold.
// Just before L1 @0.999 seconds.
AdvanceTime(399);
EqualToExpected();
// Just after L1 @1.001 seconds.
expected_.start_layer = 1;
AdvanceTime(2);
EqualToExpected();
// Just before L0 @1.099 seconds.
AdvanceTime(99);
EqualToExpected();
// Just after L0 @1.101 seconds.
expected_.start_layer = 0;
AdvanceTime(2);
EqualToExpected();
// @1.1 seconds
AdvanceTime(99);
EqualToExpected();
AddKilobitsToLayer(200, 1);
expected_.is_keyframe = true;
for (int l = 0; l < 3; ++l)
expected_.layer[l].ref_buf1 = -1;
AdvanceTime(200);
EqualToExpected();
expected_.is_keyframe = false;
expected_.start_layer = 2;
for (int l = 0; l < 3; ++l)
expected_.layer[l].ref_buf1 = l;
AdvanceTime(200);
EqualToExpected();
}
// Test that the bitrate calculations are
// correct when the timestamp wrap.
TEST_F(ScreenshareLayerTestVP9, TimestampWrap) {
InitScreenshareLayers(2);
ConfigureBitrateForLayer(100, 0);
expected_.layer[0].upd_buf = 0;
expected_.layer[0].ref_buf1 = 0;
expected_.layer[1].upd_buf = 1;
expected_.layer[1].ref_buf1 = 1;
expected_.start_layer = 0;
expected_.stop_layer = 1;
// Advance time to just before the timestamp wraps.
AdvanceTime(std::numeric_limits<uint32_t>::max() / (kTickFrequency / 1000));
EqualToExpected();
AddKilobitsToLayer(200, 0);
// Wrap
expected_.start_layer = 1;
AdvanceTime(1);
EqualToExpected();
}
} // namespace webrtc

View File

@ -22,12 +22,16 @@
'conditions': [
['build_libvpx==1', {
'dependencies': [
'<(libvpx_dir)/libvpx.gyp:libvpx',
'<(libvpx_dir)/libvpx.gyp:libvpx_new',
],
}],
['build_vp9==1', {
'sources': [
'include/vp9.h',
'screenshare_layers.cc',
'screenshare_layers.h',
'vp9_frame_buffer_pool.cc',
'vp9_frame_buffer_pool.h',
'vp9_impl.cc',
'vp9_impl.h',
],

View File

@ -0,0 +1,136 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*
*/
#include "webrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h"
#include "vpx/vpx_codec.h"
#include "vpx/vpx_decoder.h"
#include "vpx/vpx_frame_buffer.h"
#include "webrtc/base/checks.h"
#include "webrtc/system_wrappers/include/logging.h"
namespace webrtc {
uint8_t* Vp9FrameBufferPool::Vp9FrameBuffer::GetData() {
return data_.data<uint8_t>();
}
size_t Vp9FrameBufferPool::Vp9FrameBuffer::GetDataSize() const {
return data_.size();
}
void Vp9FrameBufferPool::Vp9FrameBuffer::SetSize(size_t size) {
data_.SetSize(size);
}
bool Vp9FrameBufferPool::InitializeVpxUsePool(
vpx_codec_ctx* vpx_codec_context) {
RTC_DCHECK(vpx_codec_context);
// Tell libvpx to use this pool.
if (vpx_codec_set_frame_buffer_functions(
// In which context to use these callback functions.
vpx_codec_context,
// Called by libvpx when it needs another frame buffer.
&Vp9FrameBufferPool::VpxGetFrameBuffer,
// Called by libvpx when it no longer uses a frame buffer.
&Vp9FrameBufferPool::VpxReleaseFrameBuffer,
// |this| will be passed as |user_priv| to VpxGetFrameBuffer.
this)) {
// Failed to configure libvpx to use Vp9FrameBufferPool.
return false;
}
return true;
}
rtc::scoped_refptr<Vp9FrameBufferPool::Vp9FrameBuffer>
Vp9FrameBufferPool::GetFrameBuffer(size_t min_size) {
RTC_DCHECK_GT(min_size, 0u);
rtc::scoped_refptr<Vp9FrameBuffer> available_buffer = nullptr;
{
rtc::CritScope cs(&buffers_lock_);
// Do we have a buffer we can recycle?
for (const auto& buffer : allocated_buffers_) {
if (buffer->HasOneRef()) {
available_buffer = buffer;
break;
}
}
// Otherwise create one.
if (available_buffer == nullptr) {
available_buffer = new rtc::RefCountedObject<Vp9FrameBuffer>();
allocated_buffers_.push_back(available_buffer);
if (allocated_buffers_.size() > max_num_buffers_) {
LOG(LS_WARNING)
<< allocated_buffers_.size() << " Vp9FrameBuffers have been "
<< "allocated by a Vp9FrameBufferPool (exceeding what is "
<< "considered reasonable, " << max_num_buffers_ << ").";
RTC_NOTREACHED();
}
}
}
available_buffer->SetSize(min_size);
return available_buffer;
}
int Vp9FrameBufferPool::GetNumBuffersInUse() const {
int num_buffers_in_use = 0;
rtc::CritScope cs(&buffers_lock_);
for (const auto& buffer : allocated_buffers_) {
if (!buffer->HasOneRef())
++num_buffers_in_use;
}
return num_buffers_in_use;
}
void Vp9FrameBufferPool::ClearPool() {
rtc::CritScope cs(&buffers_lock_);
allocated_buffers_.clear();
}
// static
int32_t Vp9FrameBufferPool::VpxGetFrameBuffer(void* user_priv,
size_t min_size,
vpx_codec_frame_buffer* fb) {
RTC_DCHECK(user_priv);
RTC_DCHECK(fb);
Vp9FrameBufferPool* pool = static_cast<Vp9FrameBufferPool*>(user_priv);
rtc::scoped_refptr<Vp9FrameBuffer> buffer = pool->GetFrameBuffer(min_size);
fb->data = buffer->GetData();
fb->size = buffer->GetDataSize();
// Store Vp9FrameBuffer* in |priv| for use in VpxReleaseFrameBuffer.
// This also makes vpx_codec_get_frame return images with their |fb_priv| set
// to |buffer| which is important for external reference counting.
// Release from refptr so that the buffer's |ref_count_| remains 1 when
// |buffer| goes out of scope.
fb->priv = static_cast<void*>(buffer.release());
return 0;
}
// static
int32_t Vp9FrameBufferPool::VpxReleaseFrameBuffer(void* user_priv,
vpx_codec_frame_buffer* fb) {
RTC_DCHECK(user_priv);
RTC_DCHECK(fb);
Vp9FrameBuffer* buffer = static_cast<Vp9FrameBuffer*>(fb->priv);
if (buffer != nullptr) {
buffer->Release();
// When libvpx fails to decode and you continue to try to decode (and fail)
// libvpx can for some reason try to release the same buffer multiple times.
// Setting |priv| to null protects against trying to Release multiple times.
fb->priv = nullptr;
}
return 0;
}
} // namespace webrtc

View File

@ -0,0 +1,117 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*
*/
#ifndef WEBRTC_MODULES_VIDEO_CODING_CODECS_VP9_FRAME_BUFFER_POOL_H_
#define WEBRTC_MODULES_VIDEO_CODING_CODECS_VP9_FRAME_BUFFER_POOL_H_
#include <vector>
#include "webrtc/base/basictypes.h"
#include "webrtc/base/buffer.h"
#include "webrtc/base/criticalsection.h"
#include "webrtc/base/refcount.h"
#include "webrtc/base/scoped_ref_ptr.h"
struct vpx_codec_ctx;
struct vpx_codec_frame_buffer;
namespace webrtc {
// This memory pool is used to serve buffers to libvpx for decoding purposes in
// VP9, which is set up in InitializeVPXUsePool. After the initialization any
// time libvpx wants to decode a frame it will use buffers provided and released
// through VpxGetFrameBuffer and VpxReleaseFrameBuffer.
// The benefit of owning the pool that libvpx relies on for decoding is that the
// decoded frames returned by libvpx (from vpx_codec_get_frame) use parts of our
// buffers for the decoded image data. By retaining ownership of this buffer
// using scoped_refptr, the image buffer can be reused by VideoFrames and no
// frame copy has to occur during decoding and frame delivery.
//
// Pseudo example usage case:
// Vp9FrameBufferPool pool;
// pool.InitializeVpxUsePool(decoder_ctx);
// ...
//
// // During decoding, libvpx will get and release buffers from the pool.
// vpx_codec_decode(decoder_ctx, ...);
//
// vpx_image_t* img = vpx_codec_get_frame(decoder_ctx, &iter);
// // Important to use scoped_refptr to protect it against being recycled by
// // the pool.
// scoped_refptr<Vp9FrameBuffer> img_buffer = (Vp9FrameBuffer*)img->fb_priv;
// ...
//
// // Destroying the codec will make libvpx release any buffers it was using.
// vpx_codec_destroy(decoder_ctx);
class Vp9FrameBufferPool {
public:
class Vp9FrameBuffer : public rtc::RefCountInterface {
public:
uint8_t* GetData();
size_t GetDataSize() const;
void SetSize(size_t size);
virtual bool HasOneRef() const = 0;
private:
// Data as an easily resizable buffer.
rtc::Buffer data_;
};
// Configures libvpx to, in the specified context, use this memory pool for
// buffers used to decompress frames. This is only supported for VP9.
bool InitializeVpxUsePool(vpx_codec_ctx* vpx_codec_context);
// Gets a frame buffer of at least |min_size|, recycling an available one or
// creating a new one. When no longer referenced from the outside the buffer
// becomes recyclable.
rtc::scoped_refptr<Vp9FrameBuffer> GetFrameBuffer(size_t min_size);
// Gets the number of buffers currently in use (not ready to be recycled).
int GetNumBuffersInUse() const;
// Releases allocated buffers, deleting available buffers. Buffers in use are
// not deleted until they are no longer referenced.
void ClearPool();
// InitializeVpxUsePool configures libvpx to call this function when it needs
// a new frame buffer. Parameters:
// |user_priv| Private data passed to libvpx, InitializeVpxUsePool sets it up
// to be a pointer to the pool.
// |min_size| Minimum size needed by libvpx (to decompress a frame).
// |fb| Pointer to the libvpx frame buffer object, this is updated to
// use the pool's buffer.
// Returns 0 on success. Returns < 0 on failure.
static int32_t VpxGetFrameBuffer(void* user_priv,
size_t min_size,
vpx_codec_frame_buffer* fb);
// InitializeVpxUsePool configures libvpx to call this function when it has
// finished using one of the pool's frame buffer. Parameters:
// |user_priv| Private data passed to libvpx, InitializeVpxUsePool sets it up
// to be a pointer to the pool.
// |fb| Pointer to the libvpx frame buffer object, its |priv| will be
// a pointer to one of the pool's Vp9FrameBuffer.
static int32_t VpxReleaseFrameBuffer(void* user_priv,
vpx_codec_frame_buffer* fb);
private:
// Protects |allocated_buffers_|.
mutable rtc::CriticalSection buffers_lock_;
// All buffers, in use or ready to be recycled.
std::vector<rtc::scoped_refptr<Vp9FrameBuffer>> allocated_buffers_
GUARDED_BY(buffers_lock_);
// If more buffers than this are allocated we print warnings, and crash if
// in debug mode.
static const size_t max_num_buffers_ = 10;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_VIDEO_CODING_CODECS_VP9_FRAME_BUFFER_POOL_H_

View File

@ -21,19 +21,49 @@
#include "vpx/vp8cx.h"
#include "vpx/vp8dx.h"
#include "webrtc/base/bind.h"
#include "webrtc/base/checks.h"
#include "webrtc/base/trace_event.h"
#include "webrtc/common.h"
#include "webrtc/common_video/libyuv/include/webrtc_libyuv.h"
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/system_wrappers/interface/tick_util.h"
#include "webrtc/system_wrappers/interface/trace_event.h"
#include "webrtc/modules/include/module_common_types.h"
#include "webrtc/modules/video_coding/codecs/vp9/screenshare_layers.h"
#include "webrtc/system_wrappers/include/logging.h"
#include "webrtc/system_wrappers/include/tick_util.h"
namespace {
// VP9DecoderImpl::ReturnFrame helper function used with WrappedI420Buffer.
static void WrappedI420BufferNoLongerUsedCb(
webrtc::Vp9FrameBufferPool::Vp9FrameBuffer* img_buffer) {
img_buffer->Release();
}
} // anonymous namespace
namespace webrtc {
// Only positive speeds, range for real-time coding currently is: 5 - 8.
// Lower means slower/better quality, higher means fastest/lower quality.
int GetCpuSpeed(int width, int height) {
// For smaller resolutions, use lower speed setting (get some coding gain at
// the cost of increased encoding complexity).
if (width * height <= 352 * 288)
return 5;
else
return 7;
}
VP9Encoder* VP9Encoder::Create() {
return new VP9EncoderImpl();
}
void VP9EncoderImpl::EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt,
void* user_data) {
VP9EncoderImpl* enc = (VP9EncoderImpl*)(user_data);
enc->GetEncodedLayerFrame(pkt);
}
VP9EncoderImpl::VP9EncoderImpl()
: encoded_image_(),
encoded_complete_callback_(NULL),
@ -44,7 +74,15 @@ VP9EncoderImpl::VP9EncoderImpl()
rc_max_intra_target_(0),
encoder_(NULL),
config_(NULL),
raw_(NULL) {
raw_(NULL),
input_image_(NULL),
tl0_pic_idx_(0),
frames_since_kf_(0),
num_temporal_layers_(0),
num_spatial_layers_(0),
frames_encoded_(0),
// Use two spatial when screensharing with flexible mode.
spatial_layer_(new ScreenshareLayersVP9(2)) {
memset(&codec_, 0, sizeof(codec_));
uint32_t seed = static_cast<uint32_t>(TickTime::MillisecondTimestamp());
srand(seed);
@ -78,6 +116,85 @@ int VP9EncoderImpl::Release() {
return WEBRTC_VIDEO_CODEC_OK;
}
bool VP9EncoderImpl::ExplicitlyConfiguredSpatialLayers() const {
// We check target_bitrate_bps of the 0th layer to see if the spatial layers
// (i.e. bitrates) were explicitly configured.
return num_spatial_layers_ > 1 &&
codec_.spatialLayers[0].target_bitrate_bps > 0;
}
bool VP9EncoderImpl::SetSvcRates() {
uint8_t i = 0;
if (ExplicitlyConfiguredSpatialLayers()) {
if (num_temporal_layers_ > 1) {
LOG(LS_ERROR) << "Multiple temporal layers when manually specifying "
"spatial layers not implemented yet!";
return false;
}
int total_bitrate_bps = 0;
for (i = 0; i < num_spatial_layers_; ++i)
total_bitrate_bps += codec_.spatialLayers[i].target_bitrate_bps;
// If total bitrate differs now from what has been specified at the
// beginning, update the bitrates in the same ratio as before.
for (i = 0; i < num_spatial_layers_; ++i) {
config_->ss_target_bitrate[i] = config_->layer_target_bitrate[i] =
static_cast<int>(static_cast<int64_t>(config_->rc_target_bitrate) *
codec_.spatialLayers[i].target_bitrate_bps /
total_bitrate_bps);
}
} else {
float rate_ratio[VPX_MAX_LAYERS] = {0};
float total = 0;
for (i = 0; i < num_spatial_layers_; ++i) {
if (svc_internal_.svc_params.scaling_factor_num[i] <= 0 ||
svc_internal_.svc_params.scaling_factor_den[i] <= 0) {
LOG(LS_ERROR) << "Scaling factors not specified!";
return false;
}
rate_ratio[i] =
static_cast<float>(svc_internal_.svc_params.scaling_factor_num[i]) /
svc_internal_.svc_params.scaling_factor_den[i];
total += rate_ratio[i];
}
for (i = 0; i < num_spatial_layers_; ++i) {
config_->ss_target_bitrate[i] = static_cast<unsigned int>(
config_->rc_target_bitrate * rate_ratio[i] / total);
if (num_temporal_layers_ == 1) {
config_->layer_target_bitrate[i] = config_->ss_target_bitrate[i];
} else if (num_temporal_layers_ == 2) {
config_->layer_target_bitrate[i * num_temporal_layers_] =
config_->ss_target_bitrate[i] * 2 / 3;
config_->layer_target_bitrate[i * num_temporal_layers_ + 1] =
config_->ss_target_bitrate[i];
} else if (num_temporal_layers_ == 3) {
config_->layer_target_bitrate[i * num_temporal_layers_] =
config_->ss_target_bitrate[i] / 2;
config_->layer_target_bitrate[i * num_temporal_layers_ + 1] =
config_->layer_target_bitrate[i * num_temporal_layers_] +
(config_->ss_target_bitrate[i] / 4);
config_->layer_target_bitrate[i * num_temporal_layers_ + 2] =
config_->ss_target_bitrate[i];
} else {
LOG(LS_ERROR) << "Unsupported number of temporal layers: "
<< num_temporal_layers_;
return false;
}
}
}
// For now, temporal layers only supported when having one spatial layer.
if (num_spatial_layers_ == 1) {
for (i = 0; i < num_temporal_layers_; ++i) {
config_->ts_target_bitrate[i] = config_->layer_target_bitrate[i];
}
}
return true;
}
int VP9EncoderImpl::SetRates(uint32_t new_bitrate_kbit,
uint32_t new_framerate) {
if (!inited_) {
@ -95,6 +212,12 @@ int VP9EncoderImpl::SetRates(uint32_t new_bitrate_kbit,
}
config_->rc_target_bitrate = new_bitrate_kbit;
codec_.maxFramerate = new_framerate;
spatial_layer_->ConfigureBitrate(new_bitrate_kbit, 0);
if (!SetSvcRates()) {
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
// Update encoder context
if (vpx_codec_enc_config_set(encoder_, config_)) {
return WEBRTC_VIDEO_CODEC_ERROR;
@ -121,6 +244,14 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst,
if (number_of_cores < 1) {
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
if (inst->codecSpecific.VP9.numberOfTemporalLayers > 3) {
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
// libvpx currently supports only one or two spatial layers.
if (inst->codecSpecific.VP9.numberOfSpatialLayers > 2) {
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
int retVal = Release();
if (retVal < 0) {
return retVal;
@ -135,6 +266,12 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst,
if (&codec_ != inst) {
codec_ = *inst;
}
num_spatial_layers_ = inst->codecSpecific.VP9.numberOfSpatialLayers;
num_temporal_layers_ = inst->codecSpecific.VP9.numberOfTemporalLayers;
if (num_temporal_layers_ == 0)
num_temporal_layers_ = 1;
// Random start 16 bits is enough.
picture_id_ = static_cast<uint16_t>(rand()) & 0x7FFF;
// Allocate memory for encoded image
@ -182,11 +319,57 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst,
} else {
config_->kf_mode = VPX_KF_DISABLED;
}
config_->rc_resize_allowed = inst->codecSpecific.VP9.automaticResizeOn ?
1 : 0;
// Determine number of threads based on the image size and #cores.
config_->g_threads = NumberOfThreads(config_->g_w,
config_->g_h,
number_of_cores);
cpu_speed_ = GetCpuSpeed(config_->g_w, config_->g_h);
// TODO(asapersson): Check configuration of temporal switch up and increase
// pattern length.
is_flexible_mode_ = inst->codecSpecific.VP9.flexibleMode;
if (is_flexible_mode_) {
config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
config_->ts_number_layers = num_temporal_layers_;
if (codec_.mode == kScreensharing)
spatial_layer_->ConfigureBitrate(inst->startBitrate, 0);
} else if (num_temporal_layers_ == 1) {
gof_.SetGofInfoVP9(kTemporalStructureMode1);
config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING;
config_->ts_number_layers = 1;
config_->ts_rate_decimator[0] = 1;
config_->ts_periodicity = 1;
config_->ts_layer_id[0] = 0;
} else if (num_temporal_layers_ == 2) {
gof_.SetGofInfoVP9(kTemporalStructureMode2);
config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0101;
config_->ts_number_layers = 2;
config_->ts_rate_decimator[0] = 2;
config_->ts_rate_decimator[1] = 1;
config_->ts_periodicity = 2;
config_->ts_layer_id[0] = 0;
config_->ts_layer_id[1] = 1;
} else if (num_temporal_layers_ == 3) {
gof_.SetGofInfoVP9(kTemporalStructureMode3);
config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0212;
config_->ts_number_layers = 3;
config_->ts_rate_decimator[0] = 4;
config_->ts_rate_decimator[1] = 2;
config_->ts_rate_decimator[2] = 1;
config_->ts_periodicity = 4;
config_->ts_layer_id[0] = 0;
config_->ts_layer_id[1] = 2;
config_->ts_layer_id[2] = 1;
config_->ts_layer_id[3] = 2;
} else {
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
tl0_pic_idx_ = static_cast<uint8_t>(rand());
return InitAndSetControlSettings(inst);
}
@ -206,30 +389,71 @@ int VP9EncoderImpl::NumberOfThreads(int width,
}
int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) {
config_->ss_number_layers = num_spatial_layers_;
if (ExplicitlyConfiguredSpatialLayers()) {
for (int i = 0; i < num_spatial_layers_; ++i) {
const auto& layer = codec_.spatialLayers[i];
svc_internal_.svc_params.max_quantizers[i] = config_->rc_max_quantizer;
svc_internal_.svc_params.min_quantizers[i] = config_->rc_min_quantizer;
svc_internal_.svc_params.scaling_factor_num[i] = layer.scaling_factor_num;
svc_internal_.svc_params.scaling_factor_den[i] = layer.scaling_factor_den;
}
} else {
int scaling_factor_num = 256;
for (int i = num_spatial_layers_ - 1; i >= 0; --i) {
svc_internal_.svc_params.max_quantizers[i] = config_->rc_max_quantizer;
svc_internal_.svc_params.min_quantizers[i] = config_->rc_min_quantizer;
// 1:2 scaling in each dimension.
svc_internal_.svc_params.scaling_factor_num[i] = scaling_factor_num;
svc_internal_.svc_params.scaling_factor_den[i] = 256;
if (codec_.mode != kScreensharing)
scaling_factor_num /= 2;
}
}
if (!SetSvcRates()) {
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
if (vpx_codec_enc_init(encoder_, vpx_codec_vp9_cx(), config_, 0)) {
return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
}
// Only positive speeds, currently: 0 - 8.
// O means slowest/best quality, 8 means fastest/lower quality.
cpu_speed_ = 7;
// Note: some of these codec controls still use "VP8" in the control name.
// TODO(marpan): Update this in the next/future libvpx version.
vpx_codec_control(encoder_, VP8E_SET_CPUUSED, cpu_speed_);
vpx_codec_control(encoder_, VP8E_SET_MAX_INTRA_BITRATE_PCT,
rc_max_intra_target_);
vpx_codec_control(encoder_, VP9E_SET_AQ_MODE,
inst->codecSpecific.VP9.adaptiveQpMode ? 3 : 0);
vpx_codec_control(
encoder_, VP9E_SET_SVC,
(num_temporal_layers_ > 1 || num_spatial_layers_ > 1) ? 1 : 0);
if (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) {
vpx_codec_control(encoder_, VP9E_SET_SVC_PARAMETERS,
&svc_internal_.svc_params);
}
// Register callback for getting each spatial layer.
vpx_codec_priv_output_cx_pkt_cb_pair_t cbp = {
VP9EncoderImpl::EncoderOutputCodedPacketCallback, (void*)(this)};
vpx_codec_control(encoder_, VP9E_REGISTER_CX_CALLBACK, (void*)(&cbp));
// Control function to set the number of column tiles in encoding a frame, in
// log2 unit: e.g., 0 = 1 tile column, 1 = 2 tile columns, 2 = 4 tile columns.
// The number tile columns will be capped by the encoder based on image size
// (minimum width of tile column is 256 pixels, maximum is 4096).
vpx_codec_control(encoder_, VP9E_SET_TILE_COLUMNS, (config_->g_threads >> 1));
#if !defined(WEBRTC_ARCH_ARM)
#if !defined(WEBRTC_ARCH_ARM) && !defined(WEBRTC_ARCH_ARM64)
// Note denoiser is still off by default until further testing/optimization,
// i.e., codecSpecific.VP9.denoisingOn == 0.
vpx_codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY,
inst->codecSpecific.VP9.denoisingOn ? 1 : 0);
#endif
if (codec_.mode == kScreensharing) {
// Adjust internal parameters to screen content.
vpx_codec_control(encoder_, VP9E_SET_TUNE_CONTENT, 1);
}
// Enable encoder skip of static/low content blocks.
vpx_codec_control(encoder_, VP8E_SET_STATIC_THRESHOLD, 1);
inited_ = true;
return WEBRTC_VIDEO_CODEC_OK;
}
@ -249,9 +473,9 @@ uint32_t VP9EncoderImpl::MaxIntraTarget(uint32_t optimal_buffer_size) {
return (target_pct < min_intra_size) ? min_intra_size: target_pct;
}
int VP9EncoderImpl::Encode(const I420VideoFrame& input_image,
int VP9EncoderImpl::Encode(const VideoFrame& input_image,
const CodecSpecificInfo* codec_specific_info,
const std::vector<VideoFrameType>* frame_types) {
const std::vector<FrameType>* frame_types) {
if (!inited_) {
return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
}
@ -261,13 +485,20 @@ int VP9EncoderImpl::Encode(const I420VideoFrame& input_image,
if (encoded_complete_callback_ == NULL) {
return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
}
VideoFrameType frame_type = kDeltaFrame;
FrameType frame_type = kVideoFrameDelta;
// We only support one stream at the moment.
if (frame_types && frame_types->size() > 0) {
frame_type = (*frame_types)[0];
}
DCHECK_EQ(input_image.width(), static_cast<int>(raw_->d_w));
DCHECK_EQ(input_image.height(), static_cast<int>(raw_->d_h));
RTC_DCHECK_EQ(input_image.width(), static_cast<int>(raw_->d_w));
RTC_DCHECK_EQ(input_image.height(), static_cast<int>(raw_->d_h));
// Set input image for use in the callback.
// This was necessary since you need some information from input_image.
// You can save only the necessary information (such as timestamp) instead of
// doing this.
input_image_ = &input_image;
// Image in vpx_image_t format.
// Input image is const. VPX's raw image is not defined as const.
raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(input_image.buffer(kYPlane));
@ -277,12 +508,35 @@ int VP9EncoderImpl::Encode(const I420VideoFrame& input_image,
raw_->stride[VPX_PLANE_U] = input_image.stride(kUPlane);
raw_->stride[VPX_PLANE_V] = input_image.stride(kVPlane);
int flags = 0;
bool send_keyframe = (frame_type == kKeyFrame);
vpx_enc_frame_flags_t flags = 0;
bool send_keyframe = (frame_type == kVideoFrameKey);
if (send_keyframe) {
// Key frame request from caller.
flags = VPX_EFLAG_FORCE_KF;
}
if (is_flexible_mode_) {
SuperFrameRefSettings settings;
// These structs are copied when calling vpx_codec_control,
// therefore it is ok for them to go out of scope.
vpx_svc_ref_frame_config enc_layer_conf;
vpx_svc_layer_id layer_id;
if (codec_.mode == kRealtimeVideo) {
// Real time video not yet implemented in flexible mode.
RTC_NOTREACHED();
} else {
settings = spatial_layer_->GetSuperFrameSettings(input_image.timestamp(),
send_keyframe);
}
enc_layer_conf = GenerateRefsAndFlags(settings);
layer_id.temporal_layer_id = 0;
layer_id.spatial_layer_id = settings.start_layer;
vpx_codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id);
vpx_codec_control(encoder_, VP9E_SET_SVC_REF_FRAME_CONFIG, &enc_layer_conf);
}
assert(codec_.maxFramerate > 0);
uint32_t duration = 90000 / codec_.maxFramerate;
if (vpx_codec_encode(encoder_, raw_, timestamp_, duration, flags,
@ -290,7 +544,8 @@ int VP9EncoderImpl::Encode(const I420VideoFrame& input_image,
return WEBRTC_VIDEO_CODEC_ERROR;
}
timestamp_ += duration;
return GetEncodedPartitions(input_image);
return WEBRTC_VIDEO_CODEC_OK;
}
void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
@ -299,70 +554,253 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
assert(codec_specific != NULL);
codec_specific->codecType = kVideoCodecVP9;
CodecSpecificInfoVP9 *vp9_info = &(codec_specific->codecSpecific.VP9);
vp9_info->pictureId = picture_id_;
vp9_info->keyIdx = kNoKeyIdx;
vp9_info->nonReference = (pkt.data.frame.flags & VPX_FRAME_IS_DROPPABLE) != 0;
// TODO(marpan): Temporal layers are supported in the current VP9 version,
// but for now use 1 temporal layer encoding. Will update this when temporal
// layer support for VP9 is added in webrtc.
vp9_info->temporalIdx = kNoTemporalIdx;
vp9_info->layerSync = false;
vp9_info->tl0PicIdx = kNoTl0PicIdx;
picture_id_ = (picture_id_ + 1) & 0x7FFF;
// TODO(asapersson): Set correct values.
vp9_info->inter_pic_predicted =
(pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? false : true;
vp9_info->flexible_mode = codec_.codecSpecific.VP9.flexibleMode;
vp9_info->ss_data_available = ((pkt.data.frame.flags & VPX_FRAME_IS_KEY) &&
!codec_.codecSpecific.VP9.flexibleMode)
? true
: false;
vpx_svc_layer_id_t layer_id = {0};
vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
assert(num_temporal_layers_ > 0);
assert(num_spatial_layers_ > 0);
if (num_temporal_layers_ == 1) {
assert(layer_id.temporal_layer_id == 0);
vp9_info->temporal_idx = kNoTemporalIdx;
} else {
vp9_info->temporal_idx = layer_id.temporal_layer_id;
}
if (num_spatial_layers_ == 1) {
assert(layer_id.spatial_layer_id == 0);
vp9_info->spatial_idx = kNoSpatialIdx;
} else {
vp9_info->spatial_idx = layer_id.spatial_layer_id;
}
if (layer_id.spatial_layer_id != 0) {
vp9_info->ss_data_available = false;
}
// TODO(asapersson): this info has to be obtained from the encoder.
vp9_info->temporal_up_switch = true;
bool is_first_frame = false;
if (is_flexible_mode_) {
is_first_frame =
layer_id.spatial_layer_id == spatial_layer_->GetStartLayer();
} else {
is_first_frame = layer_id.spatial_layer_id == 0;
}
if (is_first_frame) {
picture_id_ = (picture_id_ + 1) & 0x7FFF;
// TODO(asapersson): this info has to be obtained from the encoder.
vp9_info->inter_layer_predicted = false;
++frames_since_kf_;
} else {
// TODO(asapersson): this info has to be obtained from the encoder.
vp9_info->inter_layer_predicted = true;
}
if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) {
frames_since_kf_ = 0;
}
vp9_info->picture_id = picture_id_;
if (!vp9_info->flexible_mode) {
if (layer_id.temporal_layer_id == 0 && layer_id.spatial_layer_id == 0) {
tl0_pic_idx_++;
}
vp9_info->tl0_pic_idx = tl0_pic_idx_;
}
// Always populate this, so that the packetizer can properly set the marker
// bit.
vp9_info->num_spatial_layers = num_spatial_layers_;
vp9_info->num_ref_pics = 0;
if (vp9_info->flexible_mode) {
vp9_info->gof_idx = kNoGofIdx;
vp9_info->num_ref_pics = num_ref_pics_[layer_id.spatial_layer_id];
for (int i = 0; i < num_ref_pics_[layer_id.spatial_layer_id]; ++i) {
vp9_info->p_diff[i] = p_diff_[layer_id.spatial_layer_id][i];
}
} else {
vp9_info->gof_idx =
static_cast<uint8_t>(frames_since_kf_ % gof_.num_frames_in_gof);
vp9_info->temporal_up_switch = gof_.temporal_up_switch[vp9_info->gof_idx];
}
if (vp9_info->ss_data_available) {
vp9_info->spatial_layer_resolution_present = true;
for (size_t i = 0; i < vp9_info->num_spatial_layers; ++i) {
vp9_info->width[i] = codec_.width *
svc_internal_.svc_params.scaling_factor_num[i] /
svc_internal_.svc_params.scaling_factor_den[i];
vp9_info->height[i] = codec_.height *
svc_internal_.svc_params.scaling_factor_num[i] /
svc_internal_.svc_params.scaling_factor_den[i];
}
if (!vp9_info->flexible_mode) {
vp9_info->gof.CopyGofInfoVP9(gof_);
}
}
}
int VP9EncoderImpl::GetEncodedPartitions(const I420VideoFrame& input_image) {
vpx_codec_iter_t iter = NULL;
int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
encoded_image_._length = 0;
encoded_image_._frameType = kDeltaFrame;
encoded_image_._frameType = kVideoFrameDelta;
RTPFragmentationHeader frag_info;
// Note: no data partitioning in VP9, so 1 partition only. We keep this
// fragmentation data for now, until VP9 packetizer is implemented.
frag_info.VerifyAndAllocateFragmentationHeader(1);
int part_idx = 0;
CodecSpecificInfo codec_specific;
const vpx_codec_cx_pkt_t *pkt = NULL;
while ((pkt = vpx_codec_get_cx_data(encoder_, &iter)) != NULL) {
switch (pkt->kind) {
case VPX_CODEC_CX_FRAME_PKT: {
memcpy(&encoded_image_._buffer[encoded_image_._length],
pkt->data.frame.buf,
pkt->data.frame.sz);
frag_info.fragmentationOffset[part_idx] = encoded_image_._length;
frag_info.fragmentationLength[part_idx] =
static_cast<uint32_t>(pkt->data.frame.sz);
frag_info.fragmentationPlType[part_idx] = 0;
frag_info.fragmentationTimeDiff[part_idx] = 0;
encoded_image_._length += static_cast<uint32_t>(pkt->data.frame.sz);
assert(encoded_image_._length <= encoded_image_._size);
break;
}
default: {
break;
}
}
// End of frame.
if ((pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT) == 0) {
// Check if encoded frame is a key frame.
if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
encoded_image_._frameType = kKeyFrame;
}
PopulateCodecSpecific(&codec_specific, *pkt, input_image.timestamp());
break;
}
assert(pkt->kind == VPX_CODEC_CX_FRAME_PKT);
memcpy(&encoded_image_._buffer[encoded_image_._length], pkt->data.frame.buf,
pkt->data.frame.sz);
frag_info.fragmentationOffset[part_idx] = encoded_image_._length;
frag_info.fragmentationLength[part_idx] =
static_cast<uint32_t>(pkt->data.frame.sz);
frag_info.fragmentationPlType[part_idx] = 0;
frag_info.fragmentationTimeDiff[part_idx] = 0;
encoded_image_._length += static_cast<uint32_t>(pkt->data.frame.sz);
vpx_svc_layer_id_t layer_id = {0};
vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
if (is_flexible_mode_ && codec_.mode == kScreensharing)
spatial_layer_->LayerFrameEncoded(
static_cast<unsigned int>(encoded_image_._length),
layer_id.spatial_layer_id);
assert(encoded_image_._length <= encoded_image_._size);
// End of frame.
// Check if encoded frame is a key frame.
if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
encoded_image_._frameType = kVideoFrameKey;
}
PopulateCodecSpecific(&codec_specific, *pkt, input_image_->timestamp());
if (encoded_image_._length > 0) {
TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_._length);
encoded_image_._timeStamp = input_image.timestamp();
encoded_image_.capture_time_ms_ = input_image.render_time_ms();
encoded_image_._timeStamp = input_image_->timestamp();
encoded_image_.capture_time_ms_ = input_image_->render_time_ms();
encoded_image_._encodedHeight = raw_->d_h;
encoded_image_._encodedWidth = raw_->d_w;
encoded_complete_callback_->Encoded(encoded_image_, &codec_specific,
&frag_info);
&frag_info);
}
return WEBRTC_VIDEO_CODEC_OK;
}
vpx_svc_ref_frame_config VP9EncoderImpl::GenerateRefsAndFlags(
const SuperFrameRefSettings& settings) {
static const vpx_enc_frame_flags_t kAllFlags =
VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_LAST |
VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF;
vpx_svc_ref_frame_config sf_conf = {};
if (settings.is_keyframe) {
// Used later on to make sure we don't make any invalid references.
memset(buffer_updated_at_frame_, -1, sizeof(buffer_updated_at_frame_));
for (int layer = settings.start_layer; layer <= settings.stop_layer;
++layer) {
num_ref_pics_[layer] = 0;
buffer_updated_at_frame_[settings.layer[layer].upd_buf] = frames_encoded_;
// When encoding a keyframe only the alt_fb_idx is used
// to specify which layer ends up in which buffer.
sf_conf.alt_fb_idx[layer] = settings.layer[layer].upd_buf;
}
} else {
for (int layer_idx = settings.start_layer; layer_idx <= settings.stop_layer;
++layer_idx) {
vpx_enc_frame_flags_t layer_flags = kAllFlags;
num_ref_pics_[layer_idx] = 0;
int8_t refs[3] = {settings.layer[layer_idx].ref_buf1,
settings.layer[layer_idx].ref_buf2,
settings.layer[layer_idx].ref_buf3};
for (unsigned int ref_idx = 0; ref_idx < kMaxVp9RefPics; ++ref_idx) {
if (refs[ref_idx] == -1)
continue;
RTC_DCHECK_GE(refs[ref_idx], 0);
RTC_DCHECK_LE(refs[ref_idx], 7);
// Easier to remove flags from all flags rather than having to
// build the flags from 0.
switch (num_ref_pics_[layer_idx]) {
case 0: {
sf_conf.lst_fb_idx[layer_idx] = refs[ref_idx];
layer_flags &= ~VP8_EFLAG_NO_REF_LAST;
break;
}
case 1: {
sf_conf.gld_fb_idx[layer_idx] = refs[ref_idx];
layer_flags &= ~VP8_EFLAG_NO_REF_GF;
break;
}
case 2: {
sf_conf.alt_fb_idx[layer_idx] = refs[ref_idx];
layer_flags &= ~VP8_EFLAG_NO_REF_ARF;
break;
}
}
// Make sure we don't reference a buffer that hasn't been
// used at all or hasn't been used since a keyframe.
RTC_DCHECK_NE(buffer_updated_at_frame_[refs[ref_idx]], -1);
p_diff_[layer_idx][num_ref_pics_[layer_idx]] =
frames_encoded_ - buffer_updated_at_frame_[refs[ref_idx]];
num_ref_pics_[layer_idx]++;
}
bool upd_buf_same_as_a_ref = false;
if (settings.layer[layer_idx].upd_buf != -1) {
for (unsigned int ref_idx = 0; ref_idx < kMaxVp9RefPics; ++ref_idx) {
if (settings.layer[layer_idx].upd_buf == refs[ref_idx]) {
switch (ref_idx) {
case 0: {
layer_flags &= ~VP8_EFLAG_NO_UPD_LAST;
break;
}
case 1: {
layer_flags &= ~VP8_EFLAG_NO_UPD_GF;
break;
}
case 2: {
layer_flags &= ~VP8_EFLAG_NO_UPD_ARF;
break;
}
}
upd_buf_same_as_a_ref = true;
break;
}
}
if (!upd_buf_same_as_a_ref) {
// If we have three references and a buffer is specified to be
// updated, then that buffer must be the same as one of the
// three references.
RTC_CHECK_LT(num_ref_pics_[layer_idx], kMaxVp9RefPics);
sf_conf.alt_fb_idx[layer_idx] = settings.layer[layer_idx].upd_buf;
layer_flags ^= VP8_EFLAG_NO_UPD_ARF;
}
int updated_buffer = settings.layer[layer_idx].upd_buf;
buffer_updated_at_frame_[updated_buffer] = frames_encoded_;
sf_conf.frame_flags[layer_idx] = layer_flags;
}
}
}
++frames_encoded_;
return sf_conf;
}
int VP9EncoderImpl::SetChannelParameters(uint32_t packet_loss, int64_t rtt) {
return WEBRTC_VIDEO_CODEC_OK;
}
@ -388,6 +826,14 @@ VP9DecoderImpl::VP9DecoderImpl()
VP9DecoderImpl::~VP9DecoderImpl() {
inited_ = true; // in order to do the actual release
Release();
int num_buffers_in_use = frame_buffer_pool_.GetNumBuffersInUse();
if (num_buffers_in_use > 0) {
// The frame buffers are reference counted and frames are exposed after
// decoding. There may be valid usage cases where previous frames are still
// referenced after ~VP9DecoderImpl that is not a leak.
LOG(LS_INFO) << num_buffers_in_use << " Vp9FrameBuffers are still "
<< "referenced during ~VP9DecoderImpl.";
}
}
int VP9DecoderImpl::Reset() {
@ -421,6 +867,11 @@ int VP9DecoderImpl::InitDecode(const VideoCodec* inst, int number_of_cores) {
// Save VideoCodec instance for later; mainly for duplicating the decoder.
codec_ = *inst;
}
if (!frame_buffer_pool_.InitializeVpxUsePool(decoder_)) {
return WEBRTC_VIDEO_CODEC_MEMORY;
}
inited_ = true;
// Always start with a complete key frame.
key_frame_required_ = true;
@ -440,7 +891,7 @@ int VP9DecoderImpl::Decode(const EncodedImage& input_image,
}
// Always start with a complete key frame.
if (key_frame_required_) {
if (input_image._frameType != kKeyFrame)
if (input_image._frameType != kVideoFrameKey)
return WEBRTC_VIDEO_CODEC_ERROR;
// We have a key frame - is it complete?
if (input_image._completeFrame) {
@ -455,6 +906,8 @@ int VP9DecoderImpl::Decode(const EncodedImage& input_image,
if (input_image._length == 0) {
buffer = NULL; // Triggers full frame concealment.
}
// During decode libvpx may get and release buffers from |frame_buffer_pool_|.
// In practice libvpx keeps a few (~3-4) buffers alive at a time.
if (vpx_codec_decode(decoder_,
buffer,
static_cast<unsigned int>(input_image._length),
@ -462,6 +915,9 @@ int VP9DecoderImpl::Decode(const EncodedImage& input_image,
VPX_DL_REALTIME)) {
return WEBRTC_VIDEO_CODEC_ERROR;
}
// |img->fb_priv| contains the image data, a reference counted Vp9FrameBuffer.
// It may be released by libvpx during future vpx_codec_decode or
// vpx_codec_destroy calls.
img = vpx_codec_get_frame(decoder_, &iter);
int ret = ReturnFrame(img, input_image._timeStamp);
if (ret != 0) {
@ -475,15 +931,31 @@ int VP9DecoderImpl::ReturnFrame(const vpx_image_t* img, uint32_t timestamp) {
// Decoder OK and NULL image => No show frame.
return WEBRTC_VIDEO_CODEC_NO_OUTPUT;
}
decoded_image_.CreateFrame(img->planes[VPX_PLANE_Y],
img->planes[VPX_PLANE_U],
img->planes[VPX_PLANE_V],
img->d_w, img->d_h,
img->stride[VPX_PLANE_Y],
img->stride[VPX_PLANE_U],
img->stride[VPX_PLANE_V]);
decoded_image_.set_timestamp(timestamp);
int ret = decode_complete_callback_->Decoded(decoded_image_);
// This buffer contains all of |img|'s image data, a reference counted
// Vp9FrameBuffer. Performing AddRef/Release ensures it is not released and
// recycled during use (libvpx is done with the buffers after a few
// vpx_codec_decode calls or vpx_codec_destroy).
Vp9FrameBufferPool::Vp9FrameBuffer* img_buffer =
static_cast<Vp9FrameBufferPool::Vp9FrameBuffer*>(img->fb_priv);
img_buffer->AddRef();
// The buffer can be used directly by the VideoFrame (without copy) by
// using a WrappedI420Buffer.
rtc::scoped_refptr<WrappedI420Buffer> img_wrapped_buffer(
new rtc::RefCountedObject<webrtc::WrappedI420Buffer>(
img->d_w, img->d_h,
img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y],
img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U],
img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V],
// WrappedI420Buffer's mechanism for allowing the release of its frame
// buffer is through a callback function. This is where we should
// release |img_buffer|.
rtc::Bind(&WrappedI420BufferNoLongerUsedCb, img_buffer)));
VideoFrame decoded_image;
decoded_image.set_video_frame_buffer(img_wrapped_buffer);
decoded_image.set_timestamp(timestamp);
int ret = decode_complete_callback_->Decoded(decoded_image);
if (ret != 0)
return ret;
return WEBRTC_VIDEO_CODEC_OK;
@ -497,12 +969,18 @@ int VP9DecoderImpl::RegisterDecodeCompleteCallback(
int VP9DecoderImpl::Release() {
if (decoder_ != NULL) {
// When a codec is destroyed libvpx will release any buffers of
// |frame_buffer_pool_| it is currently using.
if (vpx_codec_destroy(decoder_)) {
return WEBRTC_VIDEO_CODEC_MEMORY;
}
delete decoder_;
decoder_ = NULL;
}
// Releases buffers from the pool. Any buffers not in use are deleted. Buffers
// still referenced externally are deleted once fully released, not returning
// to the pool.
frame_buffer_pool_.ClearPool();
inited_ = false;
return WEBRTC_VIDEO_CODEC_OK;
}

View File

@ -13,12 +13,16 @@
#define WEBRTC_MODULES_VIDEO_CODING_CODECS_VP9_IMPL_H_
#include "webrtc/modules/video_coding/codecs/vp9/include/vp9.h"
#include "webrtc/modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h"
#include "vpx/svc_context.h"
#include "vpx/vpx_decoder.h"
#include "vpx/vpx_encoder.h"
namespace webrtc {
class ScreenshareLayersVP9;
class VP9EncoderImpl : public VP9Encoder {
public:
VP9EncoderImpl();
@ -31,9 +35,9 @@ class VP9EncoderImpl : public VP9Encoder {
int number_of_cores,
size_t max_payload_size) override;
int Encode(const I420VideoFrame& input_image,
int Encode(const VideoFrame& input_image,
const CodecSpecificInfo* codec_specific_info,
const std::vector<VideoFrameType>* frame_types) override;
const std::vector<FrameType>* frame_types) override;
int RegisterEncodeCompleteCallback(EncodedImageCallback* callback) override;
@ -41,6 +45,22 @@ class VP9EncoderImpl : public VP9Encoder {
int SetRates(uint32_t new_bitrate_kbit, uint32_t frame_rate) override;
void OnDroppedFrame() override {}
struct LayerFrameRefSettings {
int8_t upd_buf = -1; // -1 - no update, 0..7 - update buffer 0..7
int8_t ref_buf1 = -1; // -1 - no reference, 0..7 - reference buffer 0..7
int8_t ref_buf2 = -1; // -1 - no reference, 0..7 - reference buffer 0..7
int8_t ref_buf3 = -1; // -1 - no reference, 0..7 - reference buffer 0..7
};
struct SuperFrameRefSettings {
LayerFrameRefSettings layer[kMaxVp9NumberOfSpatialLayers];
uint8_t start_layer = 0; // The first spatial layer to be encoded.
uint8_t stop_layer = 0; // The last spatial layer to be encoded.
bool is_keyframe = false;
};
private:
// Determine number of encoder threads to use.
int NumberOfThreads(int width, int height, int number_of_cores);
@ -52,7 +72,23 @@ class VP9EncoderImpl : public VP9Encoder {
const vpx_codec_cx_pkt& pkt,
uint32_t timestamp);
int GetEncodedPartitions(const I420VideoFrame& input_image);
bool ExplicitlyConfiguredSpatialLayers() const;
bool SetSvcRates();
// Used for flexible mode to set the flags and buffer references used
// by the encoder. Also calculates the references used by the RTP
// packetizer.
//
// Has to be called for every frame (keyframes included) to update the
// state used to calculate references.
vpx_svc_ref_frame_config GenerateRefsAndFlags(
const SuperFrameRefSettings& settings);
virtual int GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt);
// Callback function for outputting packets per spatial layer.
static void EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt,
void* user_data);
// Determine maximum target for Intra frames
//
@ -73,6 +109,22 @@ class VP9EncoderImpl : public VP9Encoder {
vpx_codec_ctx_t* encoder_;
vpx_codec_enc_cfg_t* config_;
vpx_image_t* raw_;
SvcInternal_t svc_internal_;
const VideoFrame* input_image_;
GofInfoVP9 gof_; // Contains each frame's temporal information for
// non-flexible mode.
uint8_t tl0_pic_idx_; // Only used in non-flexible mode.
size_t frames_since_kf_;
uint8_t num_temporal_layers_;
uint8_t num_spatial_layers_;
// Used for flexible mode.
bool is_flexible_mode_;
int64_t buffer_updated_at_frame_[kNumVp9Buffers];
int64_t frames_encoded_;
uint8_t num_ref_pics_[kMaxVp9NumberOfSpatialLayers];
uint8_t p_diff_[kMaxVp9NumberOfSpatialLayers][kMaxVp9RefPics];
rtc::scoped_ptr<ScreenshareLayersVP9> spatial_layer_;
};
@ -99,7 +151,8 @@ class VP9DecoderImpl : public VP9Decoder {
private:
int ReturnFrame(const vpx_image_t* img, uint32_t timeStamp);
I420VideoFrame decoded_image_;
// Memory pool used to share buffers between libvpx and webrtc.
Vp9FrameBufferPool frame_buffer_pool_;
DecodedImageCallback* decode_complete_callback_;
bool inited_;
vpx_codec_ctx_t* decoder_;

View File

@ -57,7 +57,9 @@ VideoCodecVP9 VideoEncoder::GetDefaultVp9Settings() {
vp9_settings.frameDroppingOn = true;
vp9_settings.keyFrameInterval = 3000;
vp9_settings.adaptiveQpMode = true;
vp9_settings.automaticResizeOn = true;
vp9_settings.numberOfSpatialLayers = 1;
vp9_settings.flexibleMode = false;
return vp9_settings;
}

View File

@ -141,27 +141,67 @@ void VCMEncodedFrame::CopyCodecSpecific(const RTPVideoHeader* header)
case kRtpVideoVp9: {
if (_codecSpecificInfo.codecType != kVideoCodecVP9) {
// This is the first packet for this frame.
_codecSpecificInfo.codecSpecific.VP9.pictureId = -1;
_codecSpecificInfo.codecSpecific.VP9.temporalIdx = 0;
_codecSpecificInfo.codecSpecific.VP9.layerSync = false;
_codecSpecificInfo.codecSpecific.VP9.keyIdx = -1;
_codecSpecificInfo.codecSpecific.VP9.picture_id = -1;
_codecSpecificInfo.codecSpecific.VP9.temporal_idx = 0;
_codecSpecificInfo.codecSpecific.VP9.spatial_idx = 0;
_codecSpecificInfo.codecSpecific.VP9.gof_idx = 0;
_codecSpecificInfo.codecSpecific.VP9.inter_layer_predicted = false;
_codecSpecificInfo.codecSpecific.VP9.tl0_pic_idx = -1;
_codecSpecificInfo.codecType = kVideoCodecVP9;
}
_codecSpecificInfo.codecSpecific.VP9.nonReference =
header->codecHeader.VP9.nonReference;
if (header->codecHeader.VP9.pictureId != kNoPictureId) {
_codecSpecificInfo.codecSpecific.VP9.pictureId =
header->codecHeader.VP9.pictureId;
_codecSpecificInfo.codecSpecific.VP9.inter_pic_predicted =
header->codecHeader.VP9.inter_pic_predicted;
_codecSpecificInfo.codecSpecific.VP9.flexible_mode =
header->codecHeader.VP9.flexible_mode;
_codecSpecificInfo.codecSpecific.VP9.num_ref_pics =
header->codecHeader.VP9.num_ref_pics;
for (uint8_t r = 0; r < header->codecHeader.VP9.num_ref_pics; ++r) {
_codecSpecificInfo.codecSpecific.VP9.p_diff[r] =
header->codecHeader.VP9.pid_diff[r];
}
if (header->codecHeader.VP9.temporalIdx != kNoTemporalIdx) {
_codecSpecificInfo.codecSpecific.VP9.temporalIdx =
header->codecHeader.VP9.temporalIdx;
_codecSpecificInfo.codecSpecific.VP9.layerSync =
header->codecHeader.VP9.layerSync;
_codecSpecificInfo.codecSpecific.VP9.ss_data_available =
header->codecHeader.VP9.ss_data_available;
if (header->codecHeader.VP9.picture_id != kNoPictureId) {
_codecSpecificInfo.codecSpecific.VP9.picture_id =
header->codecHeader.VP9.picture_id;
}
if (header->codecHeader.VP9.keyIdx != kNoKeyIdx) {
_codecSpecificInfo.codecSpecific.VP9.keyIdx =
header->codecHeader.VP9.keyIdx;
if (header->codecHeader.VP9.tl0_pic_idx != kNoTl0PicIdx) {
_codecSpecificInfo.codecSpecific.VP9.tl0_pic_idx =
header->codecHeader.VP9.tl0_pic_idx;
}
if (header->codecHeader.VP9.temporal_idx != kNoTemporalIdx) {
_codecSpecificInfo.codecSpecific.VP9.temporal_idx =
header->codecHeader.VP9.temporal_idx;
_codecSpecificInfo.codecSpecific.VP9.temporal_up_switch =
header->codecHeader.VP9.temporal_up_switch;
}
if (header->codecHeader.VP9.spatial_idx != kNoSpatialIdx) {
_codecSpecificInfo.codecSpecific.VP9.spatial_idx =
header->codecHeader.VP9.spatial_idx;
_codecSpecificInfo.codecSpecific.VP9.inter_layer_predicted =
header->codecHeader.VP9.inter_layer_predicted;
}
if (header->codecHeader.VP9.gof_idx != kNoGofIdx) {
_codecSpecificInfo.codecSpecific.VP9.gof_idx =
header->codecHeader.VP9.gof_idx;
}
if (header->codecHeader.VP9.ss_data_available) {
_codecSpecificInfo.codecSpecific.VP9.num_spatial_layers =
header->codecHeader.VP9.num_spatial_layers;
_codecSpecificInfo.codecSpecific.VP9
.spatial_layer_resolution_present =
header->codecHeader.VP9.spatial_layer_resolution_present;
if (header->codecHeader.VP9.spatial_layer_resolution_present) {
for (size_t i = 0; i < header->codecHeader.VP9.num_spatial_layers;
++i) {
_codecSpecificInfo.codecSpecific.VP9.width[i] =
header->codecHeader.VP9.width[i];
_codecSpecificInfo.codecSpecific.VP9.height[i] =
header->codecHeader.VP9.height[i];
}
}
_codecSpecificInfo.codecSpecific.VP9.gof.CopyGofInfoVP9(
header->codecHeader.VP9.gof);
}
break;
}

View File

@ -61,6 +61,8 @@ class VCMFrameBuffer : public VCMEncodedFrame {
int Tl0PicId() const;
bool NonReference() const;
void SetGofInfo(const GofInfoVP9& gof_info, size_t idx);
// Increments a counter to keep track of the number of packets of this frame
// which were NACKed before they arrived.
void IncrementNackCount();

View File

@ -36,21 +36,49 @@ void CopyCodecSpecific(const CodecSpecificInfo* info, RTPVideoHeader* rtp) {
rtp->simulcastIdx = info->codecSpecific.VP8.simulcastIdx;
return;
}
case kVideoCodecVP9: {
rtp->codec = kRtpVideoVp9;
rtp->codecHeader.VP9.InitRTPVideoHeaderVP9();
rtp->codecHeader.VP9.inter_pic_predicted =
info->codecSpecific.VP9.inter_pic_predicted;
rtp->codecHeader.VP9.flexible_mode =
info->codecSpecific.VP9.flexible_mode;
rtp->codecHeader.VP9.ss_data_available =
info->codecSpecific.VP9.ss_data_available;
rtp->codecHeader.VP9.picture_id = info->codecSpecific.VP9.picture_id;
rtp->codecHeader.VP9.tl0_pic_idx = info->codecSpecific.VP9.tl0_pic_idx;
rtp->codecHeader.VP9.temporal_idx = info->codecSpecific.VP9.temporal_idx;
rtp->codecHeader.VP9.spatial_idx = info->codecSpecific.VP9.spatial_idx;
rtp->codecHeader.VP9.temporal_up_switch =
info->codecSpecific.VP9.temporal_up_switch;
rtp->codecHeader.VP9.inter_layer_predicted =
info->codecSpecific.VP9.inter_layer_predicted;
rtp->codecHeader.VP9.gof_idx = info->codecSpecific.VP9.gof_idx;
rtp->codecHeader.VP9.num_spatial_layers =
info->codecSpecific.VP9.num_spatial_layers;
if (info->codecSpecific.VP9.ss_data_available) {
rtp->codecHeader.VP9.spatial_layer_resolution_present =
info->codecSpecific.VP9.spatial_layer_resolution_present;
if (info->codecSpecific.VP9.spatial_layer_resolution_present) {
for (size_t i = 0; i < info->codecSpecific.VP9.num_spatial_layers;
++i) {
rtp->codecHeader.VP9.width[i] = info->codecSpecific.VP9.width[i];
rtp->codecHeader.VP9.height[i] = info->codecSpecific.VP9.height[i];
}
}
rtp->codecHeader.VP9.gof.CopyGofInfoVP9(info->codecSpecific.VP9.gof);
}
rtp->codecHeader.VP9.num_ref_pics = info->codecSpecific.VP9.num_ref_pics;
for (int i = 0; i < info->codecSpecific.VP9.num_ref_pics; ++i)
rtp->codecHeader.VP9.pid_diff[i] = info->codecSpecific.VP9.p_diff[i];
return;
}
case kVideoCodecH264:
rtp->codec = kRtpVideoH264;
rtp->simulcastIdx = info->codecSpecific.H264.simulcastIdx;
return;
case kVideoCodecVP9:
rtp->codec = kRtpVideoVp9;
rtp->codecHeader.VP9.InitRTPVideoHeaderVP9();
rtp->codecHeader.VP9.pictureId = info->codecSpecific.VP9.pictureId;
rtp->codecHeader.VP9.nonReference =
info->codecSpecific.VP9.nonReference;
rtp->codecHeader.VP9.temporalIdx = info->codecSpecific.VP9.temporalIdx;
rtp->codecHeader.VP9.layerSync = info->codecSpecific.VP9.layerSync;
rtp->codecHeader.VP9.tl0PicIdx = info->codecSpecific.VP9.tl0PicIdx;
rtp->codecHeader.VP9.keyIdx = info->codecSpecific.VP9.keyIdx;
return;
case kVideoCodecGeneric:
rtp->codec = kRtpVideoGeneric;
rtp->simulcastIdx = info->codecSpecific.generic.simulcast_idx;

View File

@ -14,6 +14,9 @@
#include <algorithm>
#include <utility>
#include "webrtc/base/checks.h"
#include "webrtc/base/trace_event.h"
#include "webrtc/modules/rtp_rtcp/include/rtp_rtcp_defines.h"
#include "webrtc/modules/video_coding/main/interface/video_coding.h"
#include "webrtc/modules/video_coding/main/source/frame_buffer.h"
#include "webrtc/modules/video_coding/main/source/inter_frame_delay.h"
@ -26,10 +29,12 @@
#include "webrtc/system_wrappers/interface/event_wrapper.h"
#include "webrtc/system_wrappers/interface/logging.h"
#include "webrtc/system_wrappers/interface/metrics.h"
#include "webrtc/system_wrappers/interface/trace_event.h"
namespace webrtc {
// Interval for updating SS data.
static const uint32_t kSsCleanupIntervalSec = 60;
// Use this rtt if no value has been reported.
static const int64_t kDefaultRtt = 200;
@ -146,6 +151,98 @@ void FrameList::Reset(UnorderedFrameList* free_frames) {
}
}
bool Vp9SsMap::Insert(const VCMPacket& packet) {
if (!packet.codecSpecificHeader.codecHeader.VP9.ss_data_available)
return false;
ss_map_[packet.timestamp] = packet.codecSpecificHeader.codecHeader.VP9.gof;
return true;
}
void Vp9SsMap::Reset() {
ss_map_.clear();
}
bool Vp9SsMap::Find(uint32_t timestamp, SsMap::iterator* it_out) {
bool found = false;
for (SsMap::iterator it = ss_map_.begin(); it != ss_map_.end(); ++it) {
if (it->first == timestamp || IsNewerTimestamp(timestamp, it->first)) {
*it_out = it;
found = true;
}
}
return found;
}
void Vp9SsMap::RemoveOld(uint32_t timestamp) {
if (!TimeForCleanup(timestamp))
return;
SsMap::iterator it;
if (!Find(timestamp, &it))
return;
ss_map_.erase(ss_map_.begin(), it);
AdvanceFront(timestamp);
}
bool Vp9SsMap::TimeForCleanup(uint32_t timestamp) const {
if (ss_map_.empty() || !IsNewerTimestamp(timestamp, ss_map_.begin()->first))
return false;
uint32_t diff = timestamp - ss_map_.begin()->first;
return diff / kVideoPayloadTypeFrequency >= kSsCleanupIntervalSec;
}
void Vp9SsMap::AdvanceFront(uint32_t timestamp) {
RTC_DCHECK(!ss_map_.empty());
GofInfoVP9 gof = ss_map_.begin()->second;
ss_map_.erase(ss_map_.begin());
ss_map_[timestamp] = gof;
}
// TODO(asapersson): Update according to updates in RTP payload profile.
bool Vp9SsMap::UpdatePacket(VCMPacket* packet) {
uint8_t gof_idx = packet->codecSpecificHeader.codecHeader.VP9.gof_idx;
if (gof_idx == kNoGofIdx)
return false; // No update needed.
SsMap::iterator it;
if (!Find(packet->timestamp, &it))
return false; // Corresponding SS not yet received.
if (gof_idx >= it->second.num_frames_in_gof)
return false; // Assume corresponding SS not yet received.
RTPVideoHeaderVP9* vp9 = &packet->codecSpecificHeader.codecHeader.VP9;
vp9->temporal_idx = it->second.temporal_idx[gof_idx];
vp9->temporal_up_switch = it->second.temporal_up_switch[gof_idx];
// TODO(asapersson): Set vp9.ref_picture_id[i] and add usage.
vp9->num_ref_pics = it->second.num_ref_pics[gof_idx];
for (uint8_t i = 0; i < it->second.num_ref_pics[gof_idx]; ++i) {
vp9->pid_diff[i] = it->second.pid_diff[gof_idx][i];
}
return true;
}
void Vp9SsMap::UpdateFrames(FrameList* frames) {
for (const auto& frame_it : *frames) {
uint8_t gof_idx =
frame_it.second->CodecSpecific()->codecSpecific.VP9.gof_idx;
if (gof_idx == kNoGofIdx) {
continue;
}
SsMap::iterator ss_it;
if (Find(frame_it.second->TimeStamp(), &ss_it)) {
if (gof_idx >= ss_it->second.num_frames_in_gof) {
continue; // Assume corresponding SS not yet received.
}
frame_it.second->SetGofInfo(ss_it->second, gof_idx);
}
}
}
VCMJitterBuffer::VCMJitterBuffer(Clock* clock, EventFactory* event_factory)
: clock_(clock),
running_(false),
@ -204,7 +301,7 @@ VCMJitterBuffer::~VCMJitterBuffer() {
}
void VCMJitterBuffer::UpdateHistograms() {
if (num_packets_ <= 0) {
if (num_packets_ <= 0 || !running_) {
return;
}
int64_t elapsed_sec =
@ -624,6 +721,9 @@ VCMFrameBufferEnum VCMJitterBuffer::InsertPacket(const VCMPacket& packet,
last_decoded_state_.UpdateOldPacket(&packet);
DropPacketsFromNackList(last_decoded_state_.sequence_num());
// Also see if this old packet made more incomplete frames continuous.
FindAndInsertContinuousFramesWithState(last_decoded_state_);
if (num_consecutive_old_packets_ > kMaxConsecutiveOldPackets) {
LOG(LS_WARNING)
<< num_consecutive_old_packets_
@ -800,6 +900,16 @@ void VCMJitterBuffer::FindAndInsertContinuousFrames(
VCMDecodingState decoding_state;
decoding_state.CopyFrom(last_decoded_state_);
decoding_state.SetState(&new_frame);
FindAndInsertContinuousFramesWithState(decoding_state);
}
void VCMJitterBuffer::FindAndInsertContinuousFramesWithState(
const VCMDecodingState& original_decoded_state) {
// Copy original_decoded_state so we can move the state forward with each
// decodable frame we find.
VCMDecodingState decoding_state;
decoding_state.CopyFrom(original_decoded_state);
// When temporal layers are available, we search for a complete or decodable
// frame until we hit one of the following:
// 1. Continuous base or sync layer.
@ -807,7 +917,8 @@ void VCMJitterBuffer::FindAndInsertContinuousFrames(
for (FrameList::iterator it = incomplete_frames_.begin();
it != incomplete_frames_.end();) {
VCMFrameBuffer* frame = it->second;
if (IsNewerTimestamp(new_frame.TimeStamp(), frame->TimeStamp())) {
if (IsNewerTimestamp(original_decoded_state.time_stamp(),
frame->TimeStamp())) {
++it;
continue;
}
@ -858,7 +969,7 @@ void VCMJitterBuffer::SetNackMode(VCMNackMode mode,
low_rtt_nack_threshold_ms_ = low_rtt_nack_threshold_ms;
high_rtt_nack_threshold_ms_ = high_rtt_nack_threshold_ms;
// Don't set a high start rtt if high_rtt_nack_threshold_ms_ is used, to not
// disable NACK in hybrid mode.
// disable NACK in |kNack| mode.
if (rtt_ms_ == kDefaultRtt && high_rtt_nack_threshold_ms_ != -1) {
rtt_ms_ = 0;
}

View File

@ -89,6 +89,7 @@ static void LogCodec(const VideoCodec& codec) {
<< codec.codecSpecific.H264.ppsLen;
} else if (codec.codecType == kVideoCodecVP9) {
LOG(LS_INFO) << "VP9 specific settings";
// XXX FIX!! log VP9 specific settings
}
}