Bug 1102666 - Parse the edts block in MoofParser and offset composition time accordingly. r=ajones

This commit is contained in:
Matt Woodrow 2014-11-26 18:00:31 +13:00
parent b181902fe0
commit 334bec207c
9 changed files with 120 additions and 38 deletions

View File

@ -131,6 +131,7 @@ TrackConfig::Update(sp<MetaData>& aMetaData, const char* aMimeType)
// aMimeType points to a string from MediaDefs.cpp so we don't need to copy it
mime_type = aMimeType;
duration = FindInt64(aMetaData, kKeyDuration);
media_time = FindInt64(aMetaData, kKeyMediaTime);
mTrackId = FindInt32(aMetaData, kKeyTrackID);
crypto.Update(aMetaData);
}
@ -215,11 +216,11 @@ MP4Sample::~MP4Sample()
}
void
MP4Sample::Update()
MP4Sample::Update(int64_t& aMediaTime)
{
sp<MetaData> m = mMediaBuffer->meta_data();
decode_timestamp = FindInt64(m, kKeyDecodingTime);
composition_timestamp = FindInt64(m, kKeyTime);
composition_timestamp = FindInt64(m, kKeyTime) - aMediaTime;
duration = FindInt64(m, kKeyDuration);
byte_offset = FindInt64(m, kKey64BitFileOffset);
is_sync_point = FindInt32(m, kKeyIsSyncFrame);

View File

@ -22,7 +22,7 @@ MoofParser::RebuildFragmentedIndex(const nsTArray<MediaByteRange>& aByteRanges)
mInitRange = MediaByteRange(0, box.Range().mEnd);
ParseMoov(box);
} else if (box.IsType("moof")) {
Moof moof(box, mTrex, mMdhd);
Moof moof(box, mTrex, mMdhd, mEdts);
if (!mMoofs.IsEmpty()) {
// Stitch time ranges together in the case of a (hopefully small) time
@ -81,6 +81,8 @@ MoofParser::ParseTrak(Box& aBox)
if (!mTrex.mTrackId || tkhd.mTrackId == mTrex.mTrackId) {
ParseMdia(box, tkhd);
}
} else if (box.IsType("edts")) {
mEdts = Edts(box);
}
}
}
@ -108,18 +110,18 @@ MoofParser::ParseMvex(Box& aBox)
}
}
Moof::Moof(Box& aBox, Trex& aTrex, Mdhd& aMdhd) :
Moof::Moof(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts) :
mRange(aBox.Range()), mMaxRoundingError(0)
{
for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
if (box.IsType("traf")) {
ParseTraf(box, aTrex, aMdhd);
ParseTraf(box, aTrex, aMdhd, aEdts);
}
}
}
void
Moof::ParseTraf(Box& aBox, Trex& aTrex, Mdhd& aMdhd)
Moof::ParseTraf(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts)
{
Tfhd tfhd(aTrex);
Tfdt tfdt;
@ -132,7 +134,7 @@ Moof::ParseTraf(Box& aBox, Trex& aTrex, Mdhd& aMdhd)
}
} else if (box.IsType("trun")) {
if (!aTrex.mTrackId || tfhd.mTrackId == aTrex.mTrackId) {
ParseTrun(box, tfhd, tfdt, aMdhd);
ParseTrun(box, tfhd, tfdt, aMdhd, aEdts);
}
}
}
@ -161,7 +163,7 @@ public:
};
void
Moof::ParseTrun(Box& aBox, Tfhd& aTfhd, Tfdt& aTfdt, Mdhd& aMdhd)
Moof::ParseTrun(Box& aBox, Tfhd& aTfhd, Tfdt& aTfdt, Mdhd& aMdhd, Edts& aEdts)
{
if (!aMdhd.mTimescale) {
return;
@ -209,8 +211,8 @@ Moof::ParseTrun(Box& aBox, Tfhd& aTfhd, Tfdt& aTfdt, Mdhd& aMdhd)
offset += sampleSize;
sample.mCompositionRange = Interval<Microseconds>(
aMdhd.ToMicroseconds(decodeTime + ctsOffset),
aMdhd.ToMicroseconds(decodeTime + ctsOffset + sampleDuration));
aMdhd.ToMicroseconds(decodeTime + ctsOffset - aEdts.mMediaStart),
aMdhd.ToMicroseconds(decodeTime + ctsOffset + sampleDuration - aEdts.mMediaStart));
decodeTime += sampleDuration;
sample.mSync = !(sampleFlags & 0x1010000);
@ -328,4 +330,35 @@ Tfdt::Tfdt(Box& aBox)
}
reader->DiscardRemaining();
}
Edts::Edts(Box& aBox)
: mMediaStart(0)
{
Box child = aBox.FirstChild();
if (!child.IsType("elst")) {
return;
}
BoxReader reader(child);
uint32_t flags = reader->ReadU32();
uint8_t version = flags >> 24;
uint32_t entryCount = reader->ReadU32();
NS_ASSERTION(entryCount == 1, "Can't handle videos with multiple edits");
if (entryCount != 1) {
reader->DiscardRemaining();
return;
}
uint64_t segment_duration;
if (version == 1) {
segment_duration = reader->ReadU64();
mMediaStart = reader->Read64();
} else {
segment_duration = reader->ReadU32();
mMediaStart = reader->Read32();
}
NS_ASSERTION(segment_duration == 0, "Can't handle edits with fixed durations");
reader->DiscardRemaining();
}
}

View File

@ -96,6 +96,16 @@ public:
return mozilla::BigEndian::readUint64(ptr);
}
int64_t Read64()
{
auto ptr = Read(8);
if (!ptr) {
MOZ_ASSERT(false);
return 0;
}
return mozilla::BigEndian::readInt64(ptr);
}
const uint8_t* Read(size_t aCount)
{
if (aCount > mRemaining) {

View File

@ -78,10 +78,11 @@ public:
class TrackConfig
{
public:
TrackConfig() : mime_type(nullptr), mTrackId(0), duration(0) {}
TrackConfig() : mime_type(nullptr), mTrackId(0), duration(0), media_time(0) {}
const char* mime_type;
uint32_t mTrackId;
int64_t duration;
int64_t media_time;
CryptoTrack crypto;
void Update(stagefright::sp<stagefright::MetaData>& aMetaData,
@ -144,7 +145,7 @@ class MP4Sample
public:
MP4Sample();
~MP4Sample();
void Update();
void Update(int64_t& aMediaTime);
void Pad(size_t aPaddingBytes);
stagefright::MediaBuffer* mMediaBuffer;

View File

@ -96,6 +96,15 @@ public:
uint64_t mBaseMediaDecodeTime;
};
class Edts
{
public:
Edts() : mMediaStart(0) {}
explicit Edts(Box& aBox);
int64_t mMediaStart;
};
struct Sample
{
mozilla::MediaByteRange mByteRange;
@ -106,7 +115,7 @@ struct Sample
class Moof
{
public:
Moof(Box& aBox, Trex& aTrex, Mdhd& aMdhd);
Moof(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts);
void FixRounding(const Moof& aMoof);
mozilla::MediaByteRange mRange;
@ -115,8 +124,8 @@ public:
nsTArray<Sample> mIndex;
private:
void ParseTraf(Box& aBox, Trex& aTrex, Mdhd& aMdhd);
void ParseTrun(Box& aBox, Tfhd& aTfhd, Tfdt& aTfdt, Mdhd& aMdhd);
void ParseTraf(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts);
void ParseTrun(Box& aBox, Tfhd& aTfhd, Tfdt& aTfdt, Mdhd& aMdhd, Edts& aEdts);
uint64_t mMaxRoundingError;
};
@ -146,6 +155,7 @@ public:
Mdhd mMdhd;
Trex mTrex;
Tfdt mTfdt;
Edts mEdts;
nsTArray<Moof> mMoofs;
};
}

View File

@ -173,7 +173,7 @@ MP4Demuxer::DemuxAudioSample()
return nullptr;
}
sample->Update();
sample->Update(mAudioConfig.media_time);
return sample.forget();
}
@ -190,7 +190,7 @@ MP4Demuxer::DemuxVideoSample()
return nullptr;
}
sample->Update();
sample->Update(mVideoConfig.media_time);
sample->prefix_data = mVideoConfig.annex_b;
return sample.forget();

View File

@ -77,6 +77,7 @@ enum {
kKeyIsDRM = 'idrm', // int32_t (bool)
kKeyEncoderDelay = 'encd', // int32_t (frames)
kKeyEncoderPadding = 'encp', // int32_t (frames)
kKeyMediaTime = 'mtme', // int64_t (usecs)
kKeyAlbum = 'albu', // cstring
kKeyArtist = 'arti', // cstring

View File

@ -838,6 +838,8 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
track->includes_expensive_metadata = false;
track->skipTrack = false;
track->timescale = 0;
track->segment_duration = 0;
track->media_time = 0;
track->meta->setCString(kKeyMIMEType, "application/octet-stream");
}
@ -910,12 +912,10 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
ALOGW("ignoring edit list because timescale is 0");
} else {
off64_t entriesoffset = data_offset + 8;
uint64_t segment_duration;
int64_t media_time;
if (version == 1) {
if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
!mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
if (!mDataSource->getUInt64(entriesoffset, &mLastTrack->segment_duration) ||
!mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&mLastTrack->media_time)) {
return ERROR_IO;
}
} else if (version == 0) {
@ -925,28 +925,14 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
!mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
return ERROR_IO;
}
segment_duration = sd;
media_time = mt;
mLastTrack->segment_duration = sd;
mLastTrack->media_time = mt;
} else {
return ERROR_IO;
}
uint64_t halfscale = mHeaderTimescale / 2;
segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale;
media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale;
storeEditList();
int64_t duration;
int32_t samplerate;
if (mLastTrack->meta->findInt64(kKeyDuration, &duration) &&
mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) {
int64_t delay = (media_time * samplerate + 500000) / 1000000;
mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
int64_t paddingus = duration - (segment_duration + media_time);
int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000;
mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples);
}
}
*offset += chunk_size;
break;
@ -1115,6 +1101,10 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
mLastTrack->timescale = ntohl(timescale);
// Now that we've parsed the media timescale, we can interpret
// the edit list data.
storeEditList();
int64_t duration = 0;
if (version == 1) {
if (mDataSource->readAt(
@ -1812,6 +1802,35 @@ status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
return OK;
}
void MPEG4Extractor::storeEditList()
{
if (mHeaderTimescale == 0 ||
mLastTrack->timescale == 0) {
return;
}
uint64_t segment_duration = (mLastTrack->segment_duration * 1000000)/ mHeaderTimescale;
// media_time is measured in media time scale units.
int64_t media_time = (mLastTrack->media_time * 1000000) / mLastTrack->timescale;
if (segment_duration == 0) {
mLastTrack->meta->setInt64(kKeyMediaTime, media_time);
}
int64_t duration;
int32_t samplerate;
if (mLastTrack->meta->findInt64(kKeyDuration, &duration) &&
mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) {
int64_t delay = (media_time * samplerate + 500000) / 1000000;
mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
int64_t paddingus = duration - (segment_duration + media_time);
int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000;
mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples);
}
}
status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
ALOGV("MPEG4Extractor::parseSegmentIndex");

View File

@ -86,6 +86,11 @@ private:
Track *next;
sp<MetaData> meta;
uint32_t timescale;
// Temporary storage for elst until we've
// parsed mdhd and can interpret them.
uint64_t segment_duration;
int64_t media_time;
sp<SampleTable> sampleTable;
bool includes_expensive_metadata;
bool skipTrack;
@ -141,6 +146,8 @@ private:
status_t parseSegmentIndex(off64_t data_offset, size_t data_size);
void storeEditList();
Track *findTrackByMimePrefix(const char *mimePrefix);
MPEG4Extractor(const MPEG4Extractor &);