Bug 674225 - Add Opus support to nsOggReader. r=cpearce

Parse and decode Opus streams embedded in the Ogg
container. Based on the draft specification from
https://wiki.xiph.org/OggOpus
Support is conditional on the runtime preference
setting media.opus.enabled, which is false by
default until we're confident the spec is stable
and useful.

This patch doesn't support the gain header or
multichannel files.

The LEUint*() functions from the skeleton parser
are used to read the multi-byte header fields.
This requires moving them to earlier in the file.

Mappings for the .opus filename extension are also
added to facilitate testing with local files.
This commit is contained in:
Ralph Giles 2012-05-01 17:29:34 -07:00
parent 1596d5df6d
commit 64ef2c566d
8 changed files with 393 additions and 43 deletions

View File

@ -300,6 +300,8 @@ public:
static bool IsOggType(const nsACString& aType);
static const char gOggTypes[3][16];
static char const *const gOggCodecs[3];
static bool IsOpusEnabled();
static char const *const gOggCodecsWithOpus[4];
#endif
#ifdef MOZ_WAVE

View File

@ -1963,6 +1963,23 @@ char const *const nsHTMLMediaElement::gOggCodecs[3] = {
nsnull
};
char const *const nsHTMLMediaElement::gOggCodecsWithOpus[4] = {
"vorbis",
"opus",
"theora",
nsnull
};
bool
nsHTMLMediaElement::IsOpusEnabled()
{
#ifdef MOZ_OPUS
return Preferences::GetBool("media.opus.enabled");
#else
return false;
#endif
}
bool
nsHTMLMediaElement::IsOggEnabled()
{
@ -2113,7 +2130,7 @@ nsHTMLMediaElement::CanHandleMediaType(const char* aMIMEType,
#endif
#ifdef MOZ_OGG
if (IsOggType(nsDependentCString(aMIMEType))) {
*aCodecList = gOggCodecs;
*aCodecList = IsOpusEnabled() ? gOggCodecsWithOpus : gOggCodecs;
return CANPLAY_MAYBE;
}
#endif

View File

@ -53,6 +53,30 @@ extern PRLogModuleInfo* gBuiltinDecoderLog;
#define LOG(type, msg)
#endif
// Reads a little-endian encoded unsigned 32bit integer at p.
static PRUint32 LEUint32(const unsigned char* p)
{
return p[0] +
(p[1] << 8) +
(p[2] << 16) +
(p[3] << 24);
}
// Reads a little-endian encoded 64bit integer at p.
static PRInt64 LEInt64(const unsigned char* p)
{
PRUint32 lo = LEUint32(p);
PRUint32 hi = LEUint32(p + 4);
return static_cast<PRInt64>(lo) | (static_cast<PRInt64>(hi) << 32);
}
// Reads a little-endian encoded unsigned 16bit integer at p.
static PRUint16 LEUint16(const unsigned char* p)
{
return p[0] + (p[1] << 8);
}
/** Decoder base class for Ogg-encapsulated streams. */
nsOggCodecState*
nsOggCodecState::Create(ogg_page* aPage)
{
@ -62,6 +86,10 @@ nsOggCodecState::Create(ogg_page* aPage)
codecState = new nsTheoraState(aPage);
} else if (aPage->body_len > 6 && memcmp(aPage->body+1, "vorbis", 6) == 0) {
codecState = new nsVorbisState(aPage);
#ifdef MOZ_OPUS
} else if (aPage->body_len > 8 && memcmp(aPage->body, "OpusHead", 8) == 0) {
codecState = new nsOpusState(aPage);
#endif
} else if (aPage->body_len > 8 && memcmp(aPage->body, "fishead\0", 8) == 0) {
codecState = new nsSkeletonState(aPage);
} else {
@ -750,12 +778,178 @@ nsresult nsVorbisState::ReconstructVorbisGranulepos()
return NS_OK;
}
#ifdef MOZ_OPUS
nsOpusState::nsOpusState(ogg_page* aBosPage) :
nsOggCodecState(aBosPage, true),
mRate(0),
mNominalRate(0),
mChannels(0),
mPreSkip(0),
mGain(0.0),
mChannelMapping(0),
mStreams(0),
mDecoder(NULL)
{
MOZ_COUNT_CTOR(nsOpusState);
}
nsSkeletonState::nsSkeletonState(ogg_page* aBosPage)
: nsOggCodecState(aBosPage, true),
mVersion(0),
mPresentationTime(0),
mLength(0)
nsOpusState::~nsOpusState() {
MOZ_COUNT_DTOR(nsOpusState);
Reset();
if (mDecoder) {
opus_decoder_destroy(mDecoder);
mDecoder = NULL;
}
}
nsresult nsOpusState::Reset()
{
nsresult res = NS_OK;
if (mActive && mDecoder) {
// Reset the decoder.
opus_decoder_ctl(mDecoder, OPUS_RESET_STATE);
}
// Clear queued data.
if (NS_FAILED(nsOggCodecState::Reset())) {
return NS_ERROR_FAILURE;
}
return res;
}
bool nsOpusState::Init(void)
{
int error;
NS_ASSERTION(mDecoder == NULL, "leaking OpusDecoder");
mDecoder = opus_decoder_create(mRate, mChannels, &error);
return error == OPUS_OK;
}
bool nsOpusState::DecodeHeader(ogg_packet* aPacket)
{
// Minimum length of any header is 16 bytes.
if (aPacket->bytes < 16) {
LOG(PR_LOG_DEBUG, ("Invalid Opus file: header too short"));
mActive = false;
return true;
}
// Try parsing as the metadata header.
if (!memcmp(aPacket->packet, "OpusTags", 8)) {
mDoneReadingHeaders = true; // This is the last Opus header.
mActive = true;
return true;
}
// Otherwise, parse as the id header.
if (aPacket->bytes < 19 || memcmp(aPacket->packet, "OpusHead\0", 9)) {
LOG(PR_LOG_DEBUG, ("Invalid Opus file: unrecognized header"));
mActive = false;
return true;
}
mRate = 48000; // The Opus decoder runs at 48 kHz regardless.
mChannels= aPacket->packet[9];
mPreSkip = LEUint16(aPacket->packet + 10);
mNominalRate = LEUint32(aPacket->packet + 12);
mGain = (float)LEUint16(aPacket->packet + 16) / 256.0;
mChannelMapping = aPacket->packet[18];
if (mChannelMapping == 0) {
mStreams = 1;
} else if (aPacket->bytes > 19) {
mStreams = aPacket->packet[19];
} else {
LOG(PR_LOG_DEBUG, ("Invalid Opus file: channel mapping %d,"
" but no channel mapping table", mChannelMapping));
mActive = false;
return true;
}
return true;
}
/* Return the timestamp (in microseconds) equivalent to a granulepos. */
PRInt64 nsOpusState::Time(PRInt64 granulepos)
{
if (granulepos < 0)
return -1;
// Ogg Opus always runs at a granule rate of 48 kHz.
CheckedInt64 t = CheckedInt64(granulepos - mPreSkip) * USECS_PER_S;
return t.valid() ? t.value() / mRate : -1;
}
bool nsOpusState::IsHeader(ogg_packet* aPacket)
{
return aPacket->bytes >= 16 &&
(!memcmp(aPacket->packet, "OpusHead\0", 9) ||
!memcmp(aPacket->packet, "OpusTags", 8));
}
nsresult nsOpusState::PageIn(ogg_page* aPage)
{
if (!mActive)
return NS_OK;
NS_ASSERTION(static_cast<PRUint32>(ogg_page_serialno(aPage)) == mSerial,
"Page must be for this stream!");
if (ogg_stream_pagein(&mState, aPage) == -1)
return NS_ERROR_FAILURE;
bool haveGranulepos;
nsresult rv = PacketOutUntilGranulepos(haveGranulepos);
if (NS_FAILED(rv) || !haveGranulepos || !mDoneReadingHeaders)
return rv;
ReconstructGranulepos();
for (PRUint32 i = 0; i < mUnstamped.Length(); i++) {
ogg_packet* packet = mUnstamped[i];
NS_ASSERTION(!IsHeader(packet), "Don't try to play a header packet");
NS_ASSERTION(packet->granulepos != -1, "Packet should have a granulepos");
mPackets.Append(packet);
}
mUnstamped.Clear();
return NS_OK;
}
void nsOpusState::ReconstructGranulepos(void)
{
NS_ASSERTION(mUnstamped.Length() > 0, "Must have unstamped packets");
ogg_packet* last = mUnstamped[mUnstamped.Length()-1];
NS_ASSERTION(last->e_o_s || last->granulepos > 0,
"Must know last granulepos!");
// Loop through the packets backwards, subtracting the next
// packet's duration from its granulepos to get the value
// for the current packet.
for (PRUint32 i = mUnstamped.Length() - 1; i > 0; i--) {
ogg_packet* next = mUnstamped[i];
int offset = opus_decoder_get_nb_samples(mDecoder,
next->packet,
next->bytes);
// Check for error (negative offset) and overflow.
if (offset >= 0 && offset <= next->granulepos) {
mUnstamped[i - 1]->granulepos = next->granulepos - offset;
} else {
if (offset > next->granulepos)
NS_WARNING("Clamping negative Opus granulepos to zero.");
mUnstamped[i - 1]->granulepos = 0;
}
}
}
#endif /* MOZ_OPUS */
nsSkeletonState::nsSkeletonState(ogg_page* aBosPage) :
nsOggCodecState(aBosPage, true),
mVersion(0),
mPresentationTime(0),
mLength(0)
{
MOZ_COUNT_CTOR(nsSkeletonState);
}
@ -810,29 +1004,6 @@ static bool IsSkeletonIndex(ogg_packet* aPacket)
memcmp(reinterpret_cast<char*>(aPacket->packet), "index", 5) == 0;
}
// Reads a little-endian encoded unsigned 32bit integer at p.
static PRUint32 LEUint32(const unsigned char* p)
{
return p[0] +
(p[1] << 8) +
(p[2] << 16) +
(p[3] << 24);
}
// Reads a little-endian encoded 64bit integer at p.
static PRInt64 LEInt64(const unsigned char* p)
{
PRUint32 lo = LEUint32(p);
PRUint32 hi = LEUint32(p + 4);
return static_cast<PRInt64>(lo) | (static_cast<PRInt64>(hi) << 32);
}
// Reads a little-endian encoded unsigned 16bit integer at p.
static PRUint16 LEUint16(const unsigned char* p)
{
return p[0] + (p[1] << 8);
}
// Reads a variable length encoded integer at p. Will not read
// past aLimit. Returns pointer to character after end of integer.
static const unsigned char* ReadVariableLengthInt(const unsigned char* p,

View File

@ -46,6 +46,9 @@
#else
#include <vorbis/codec.h>
#endif
#ifdef MOZ_OPUS
#include <opus/opus.h>
#endif
#include <nsDeque.h>
#include <nsTArray.h>
#include <nsClassHashtable.h>
@ -101,8 +104,9 @@ public:
enum CodecType {
TYPE_VORBIS=0,
TYPE_THEORA=1,
TYPE_SKELETON=2,
TYPE_UNKNOWN=3
TYPE_OPUS=2,
TYPE_SKELETON=3,
TYPE_UNKNOWN=4
};
virtual ~nsOggCodecState();
@ -319,6 +323,42 @@ private:
};
class nsOpusState : public nsOggCodecState {
#ifdef MOZ_OPUS
public:
nsOpusState(ogg_page* aBosPage);
virtual ~nsOpusState();
CodecType GetType() { return TYPE_OPUS; }
bool DecodeHeader(ogg_packet* aPacket);
PRInt64 Time(PRInt64 granulepos);
bool Init();
nsresult Reset();
bool IsHeader(ogg_packet* aPacket);
nsresult PageIn(ogg_page* aPage);
// Various fields from the Ogg Opus header.
int mRate; // Sample rate the decoder uses (always 48 kHz).
int mNominalRate; // Original sample rate of the data (informational).
int mChannels; // Number of channels the stream encodes.
int mPreSkip; // Number of samples to strip after decoder reset.
float mGain; // Gain (dB) to apply to decoder output.
int mChannelMapping; // Channel mapping family.
int mStreams; // Number of packed streams in each packet.
OpusDecoder *mDecoder;
private:
// Reconstructs the granulepos of Opus packets stored in the
// mUnstamped array. mUnstamped must be filled with consecutive packets from
// the stream, with the last packet having a known granulepos. Using this
// known granulepos, and the known frame numbers, we recover the granulepos
// of all frames in the array. This enables us to determine their timestamps.
void ReconstructGranulepos();
#endif /* MOZ_OPUS */
};
// Constructs a 32bit version number out of two 16 bit major,minor
// version numbers.
#define SKELETON_VERSION(major, minor) (((major)<<16)|(minor))

View File

@ -42,6 +42,9 @@
#include "nsOggReader.h"
#include "VideoUtils.h"
#include "theora/theoradec.h"
#ifdef MOZ_OPUS
#include "opus/opus.h"
#endif
#include "nsTimeRanges.h"
#include "mozilla/TimeStamp.h"
@ -105,6 +108,8 @@ nsOggReader::nsOggReader(nsBuiltinDecoder* aDecoder)
: nsBuiltinDecoderReader(aDecoder),
mTheoraState(nsnull),
mVorbisState(nsnull),
mOpusState(nsnull),
mOpusEnabled(nsHTMLMediaElement::IsOpusEnabled()),
mSkeletonState(nsnull),
mVorbisSerial(0),
mTheoraSerial(0),
@ -145,6 +150,9 @@ nsresult nsOggReader::ResetDecode()
if (mVorbisState && NS_FAILED(mVorbisState->Reset())) {
res = NS_ERROR_FAILURE;
}
if (mOpusState && NS_FAILED(mOpusState->Reset())) {
res = NS_ERROR_FAILURE;
}
if (mTheoraState && NS_FAILED(mTheoraState->Reset())) {
res = NS_ERROR_FAILURE;
}
@ -217,6 +225,17 @@ nsresult nsOggReader::ReadMetadata(nsVideoInfo* aInfo)
// bitstreams will be ignored.
mTheoraState = static_cast<nsTheoraState*>(codecState);
}
if (codecState &&
codecState->GetType() == nsOggCodecState::TYPE_OPUS &&
!mOpusState)
{
if (mOpusEnabled) {
mOpusState = static_cast<nsOpusState*>(codecState);
} else {
NS_WARNING("Opus decoding disabled."
" See media.opus.enabled in about:config");
}
}
if (codecState &&
codecState->GetType() == nsOggCodecState::TYPE_SKELETON &&
!mSkeletonState)
@ -240,7 +259,8 @@ nsresult nsOggReader::ReadMetadata(nsVideoInfo* aInfo)
// Deactivate any non-primary bitstreams.
for (PRUint32 i = 0; i < bitstreams.Length(); i++) {
nsOggCodecState* s = bitstreams[i];
if (s != mVorbisState && s != mTheoraState && s != mSkeletonState) {
if (s != mVorbisState && s != mOpusState &&
s != mTheoraState && s != mSkeletonState) {
s->Deactivate();
}
}
@ -290,7 +310,13 @@ nsresult nsOggReader::ReadMetadata(nsVideoInfo* aInfo)
} else {
memset(&mVorbisInfo, 0, sizeof(mVorbisInfo));
}
#ifdef MOZ_OPUS
if (mOpusState && ReadHeaders(mOpusState)) {
mInfo.mHasAudio = true;
mInfo.mAudioRate = mOpusState->mRate;
mInfo.mAudioChannels = mOpusState->mChannels;
}
#endif
if (mSkeletonState) {
if (!HasAudio() && !HasVideo()) {
// We have a skeleton track, but no audio or video, may as well disable
@ -386,20 +412,84 @@ nsresult nsOggReader::DecodeVorbis(ogg_packet* aPacket) {
}
return NS_OK;
}
#ifdef MOZ_OPUS
nsresult nsOggReader::DecodeOpus(ogg_packet* aPacket) {
NS_ASSERTION(aPacket->granulepos != -1, "Must know opus granulepos!");
PRInt32 frames = opus_decoder_get_nb_samples(mOpusState->mDecoder,
aPacket->packet,
aPacket->bytes);
if (frames <= 0)
return NS_ERROR_FAILURE;
PRUint32 channels = mOpusState->mChannels;
nsAutoArrayPtr<AudioDataValue> buffer(new AudioDataValue[frames * channels]);
// Decode to the appropriate sample type.
#ifdef MOZ_SAMPLE_TYPE_FLOAT32
int ret = opus_decode_float(mOpusState->mDecoder,
aPacket->packet, aPacket->bytes,
buffer, frames, false);
#else
int ret = opus_decode(mOpusState->mDecoder,
aPacket->packet, aPacket->bytes,
buffer, frames, false);
#endif
if (ret < 0)
return NS_ERROR_FAILURE;
NS_ASSERTION(ret == frames, "Opus decoded too few audio samples");
PRInt64 endFrame = aPacket->granulepos;
PRInt64 endTime = mOpusState->Time(endFrame);
PRInt64 startTime = mOpusState->Time(endFrame - frames);
PRInt64 duration = endTime - startTime;
// Trim the initial samples.
if (endTime < 0)
return NS_OK;
if (startTime < 0) {
PRInt32 skip = mOpusState->mPreSkip;
PRInt32 goodFrames = frames - skip;
NS_ASSERTION(goodFrames > 0, "endTime calculation was wrong");
nsAutoArrayPtr<AudioDataValue> goodBuffer(new AudioDataValue[goodFrames * channels]);
for (int i = 0; i < goodFrames*channels; i++)
goodBuffer[i] = buffer[skip*channels + i];
startTime = mOpusState->Time(endFrame - goodFrames);
duration = endTime - startTime;
frames = goodFrames;
buffer = goodBuffer;
}
mAudioQueue.Push(new AudioData(mPageOffset,
startTime,
duration,
frames,
buffer.forget(),
channels));
return NS_OK;
}
#endif /* MOZ_OPUS */
bool nsOggReader::DecodeAudioData()
{
NS_ASSERTION(mDecoder->OnDecodeThread(), "Should be on decode thread.");
NS_ASSERTION(mVorbisState!=0, "Need Vorbis state to decode audio");
NS_ASSERTION(mVorbisState != nsnull || mOpusState != nsnull,
"Need audio codec state to decode audio");
// Read the next data packet. Skip any non-data packets we encounter.
ogg_packet* packet = 0;
nsOggCodecState* codecState;
if (mVorbisState)
codecState = static_cast<nsOggCodecState*>(mVorbisState);
else
codecState = static_cast<nsOggCodecState*>(mOpusState);
do {
if (packet) {
nsOggCodecState::ReleasePacket(packet);
}
packet = NextOggPacket(mVorbisState);
} while (packet && mVorbisState->IsHeader(packet));
packet = NextOggPacket(codecState);
} while (packet && codecState->IsHeader(packet));
if (!packet) {
mAudioQueue.Finish();
return false;
@ -408,7 +498,14 @@ bool nsOggReader::DecodeAudioData()
NS_ASSERTION(packet && packet->granulepos != -1,
"Must have packet with known granulepos");
nsAutoReleasePacket autoRelease(packet);
DecodeVorbis(packet);
if (mVorbisState) {
DecodeVorbis(packet);
#ifdef MOZ_OPUS
} else if (mOpusState) {
DecodeOpus(packet);
#endif
}
if (packet->e_o_s) {
// We've encountered an end of bitstream packet, or we've hit the end of
// file while trying to decode, so inform the audio queue that there'll
@ -1295,11 +1392,14 @@ nsresult nsOggReader::SeekBisection(PRInt64 aTarget,
ogg_int64_t granulepos = ogg_page_granulepos(&page);
if (HasAudio() &&
granulepos > 0 &&
serial == mVorbisState->mSerial &&
audioTime == -1) {
audioTime = mVorbisState->Time(granulepos);
if (HasAudio() && granulepos > 0 && audioTime == -1) {
if (mVorbisState && serial == mVorbisState->mSerial) {
audioTime = mVorbisState->Time(granulepos);
#ifdef MOZ_OPUS
} else if (mOpusState && serial == mOpusState->mSerial) {
audioTime = mOpusState->Time(granulepos);
#endif
}
}
if (HasVideo() &&

View File

@ -72,7 +72,8 @@ public:
PRInt64 aTimeThreshold);
virtual bool HasAudio() {
return mVorbisState != 0 && mVorbisState->mActive;
return (mVorbisState != 0 && mVorbisState->mActive) ||
(mOpusState != 0 && mOpusState->mActive);
}
virtual bool HasVideo() {
@ -216,6 +217,10 @@ private:
// audio queue.
nsresult DecodeVorbis(ogg_packet* aPacket);
// Decodes a packet of Opus data, and inserts its samples into the
// audio queue.
nsresult DecodeOpus(ogg_packet* aPacket);
// Decodes a packet of Theora data, and inserts its frame into the
// video queue. May return NS_ERROR_OUT_OF_MEMORY. Caller must have obtained
// the reader's monitor. aTimeThreshold is the current playback position
@ -253,6 +258,14 @@ private:
// Decode state of the Vorbis bitstream we're decoding, if we have audio.
nsVorbisState* mVorbisState;
// Decode state of the Opus bitstream we're decoding, if we have one.
nsOpusState *mOpusState;
// Represents the user pref media.opus.enabled at the time our
// contructor was called. We can't check it dynamically because
// we're not on the main thread;
bool mOpusEnabled;
// Decode state of the Skeleton bitstream.
nsSkeletonState* mSkeletonState;

View File

@ -187,6 +187,9 @@ pref("media.raw.enabled", true);
#ifdef MOZ_OGG
pref("media.ogg.enabled", true);
#endif
#ifdef MOZ_OPUS
pref("media.opus.enabled", false);
#endif
#ifdef MOZ_WAVE
pref("media.wave.enabled", true);
#endif

View File

@ -432,6 +432,9 @@ static nsDefaultMimeTypeEntry defaultMimeEntries [] =
{ VIDEO_OGG, "ogg" },
{ APPLICATION_OGG, "ogg" },
{ AUDIO_OGG, "oga" },
#ifdef MOZ_OPUS
{ AUDIO_OGG, "opus" },
#endif
#endif
#ifdef MOZ_WEBM
{ VIDEO_WEBM, "webm" },
@ -505,6 +508,7 @@ static nsExtraMimeTypeEntry extraMimeEntries [] =
{ VIDEO_OGG, "ogg", "Ogg Video" },
{ APPLICATION_OGG, "ogg", "Ogg Video"},
{ AUDIO_OGG, "oga", "Ogg Audio" },
{ AUDIO_OGG, "opus", "Opus Audio" },
{ VIDEO_WEBM, "webm", "Web Media Video" },
{ AUDIO_WEBM, "webm", "Web Media Audio" },
{ VIDEO_RAW, "yuv", "Raw YUV Video" },