Bug 870400 - Calculate audio timestamps for WMF based on audio frames played, rather than trusting the container's timestamps. r=padenot

2024-10-20 16:55:40 +00:00 · 2013-05-31 11:34:52 +12:00 · 2013-05-31 11:34:52 +12:00 · f24ba25bbd
commit f24ba25bbd
parent 7f1070c9de
2 changed files with 72 additions and 5 deletions
--- a/content/media/wmf/WMFReader.cpp
+++ b/content/media/wmf/WMFReader.cpp
@ -48,10 +48,13 @@ WMFReader::WMFReader(AbstractMediaDecoder* aDecoder)
    mVideoWidth(0),
    mVideoHeight(0),
    mVideoStride(0),
+    mAudioFrameSum(0),
+    mAudioFrameOffset(0),
    mHasAudio(false),
    mHasVideo(false),
    mCanSeek(false),
    mUseHwAccel(false),
+    mMustRecaptureAudioPosition(true),
    mIsMP3Enabled(WMFDecoder::IsMP3Supported())
 {
  NS_ASSERTION(NS_IsMainThread(), "Must be on main thread.");
@ -605,6 +608,31 @@ GetSampleDuration(IMFSample* aSample)
  return HNsToUsecs(duration);
 }

+HRESULT
+HNsToFrames(int64_t aHNs, uint32_t aRate, int64_t* aOutFrames)
+{
+  MOZ_ASSERT(aOutFrames);
+  const int64_t HNS_PER_S = USECS_PER_S * 10;
+  CheckedInt<int64_t> i = aHNs;
+  i *= aRate;
+  i /= HNS_PER_S;
+  NS_ENSURE_TRUE(i.isValid(), E_FAIL);
+  *aOutFrames = i.value();
+  return S_OK;
+}
+
+HRESULT
+FramesToUsecs(int64_t aSamples, uint32_t aRate, int64_t* aOutUsecs)
+{
+  MOZ_ASSERT(aOutUsecs);
+  CheckedInt<int64_t> i = aSamples;
+  i *= USECS_PER_S;
+  i /= aRate;
+  NS_ENSURE_TRUE(i.isValid(), E_FAIL);
+  *aOutUsecs = i.value();
+  return S_OK;
+}
+
 bool
 WMFReader::DecodeAudioData()
 {
@ -660,11 +688,33 @@ WMFReader::DecodeAudioData()
  memcpy(pcmSamples.get(), data, currentLength);
  buffer->Unlock();

-  int64_t offset = mDecoder->GetResource()->Tell();
-  int64_t timestamp = HNsToUsecs(timestampHns);
-  int64_t duration = GetSampleDuration(sample);
+  // We calculate the timestamp and the duration based on the number of audio
+  // frames we've already played. We don't trust the timestamp stored on the
+  // IMFSample, as sometimes it's wrong, possibly due to buggy encoders?

-  mAudioQueue.Push(new AudioData(offset,
+  // If this sample block comes after a discontinuity (i.e. a gap or seek)
+  // reset the frame counters, and capture the timestamp. Future timestamps
+  // will be offset from this block's timestamp.
+  UINT32 discontinuity = false;
+  sample->GetUINT32(MFSampleExtension_Discontinuity, &discontinuity);
+  if (mMustRecaptureAudioPosition || discontinuity) {
+    mAudioFrameSum = 0;
+    hr = HNsToFrames(timestampHns, mAudioRate, &mAudioFrameOffset);
+    NS_ENSURE_TRUE(SUCCEEDED(hr), false);
+    mMustRecaptureAudioPosition = false;
+  }
+
+  int64_t timestamp;
+  hr = FramesToUsecs(mAudioFrameOffset + mAudioFrameSum, mAudioRate, &timestamp);
+  NS_ENSURE_TRUE(SUCCEEDED(hr), false);
+
+  mAudioFrameSum += numFrames;
+
+  int64_t duration;
+  hr = FramesToUsecs(numFrames, mAudioRate, &duration);
+  NS_ENSURE_TRUE(SUCCEEDED(hr), false);
+
+  mAudioQueue.Push(new AudioData(mDecoder->GetResource()->Tell(),
                                 timestamp,
                                 duration,
                                 numFrames,
@ -901,7 +951,7 @@ WMFReader::DecodeVideoFrame(bool &aKeyframeSkip,

  #ifdef LOG_SAMPLE_DECODE
  LOG("Decoded video sample timestamp=%lld duration=%lld stride=%d height=%u flags=%u",
-      timestamp, duration, stride, mVideoHeight, flags);
+      timestamp, duration, mVideoStride, mVideoHeight, flags);
  #endif

  if ((flags & MF_SOURCE_READERF_ENDOFSTREAM)) {
@ -930,6 +980,12 @@ WMFReader::Seek(int64_t aTargetUs,
  nsresult rv = ResetDecode();
  NS_ENSURE_SUCCESS(rv, rv);

+  // Mark that we must recapture the audio frame count from the next sample.
+  // WMF doesn't set a discontinuity marker when we seek to time 0, so we
+  // must remember to recapture the audio frame offset and reset the frame
+  // sum on the next audio packet we decode.
+  mMustRecaptureAudioPosition = true;
+
  AutoPropVar var;
  HRESULT hr = InitPropVariantFromInt64(UsecsToHNs(aTargetUs), &var);
  NS_ENSURE_TRUE(SUCCEEDED(hr), NS_ERROR_FAILURE);
--- a/content/media/wmf/WMFReader.h
+++ b/content/media/wmf/WMFReader.h
@ -91,6 +91,17 @@ private:
  uint32_t mVideoHeight;
  uint32_t mVideoStride;

+  // The offset, in audio frames, at which playback started since the
+  // last discontinuity.
+  int64_t mAudioFrameOffset;
+  // The number of audio frames that we've played since the last
+  // discontinuity.
+  int64_t mAudioFrameSum;
+  // True if we need to re-initialize mAudioFrameOffset and mAudioFrameSum
+  // from the next audio packet we decode. This happens after a seek, since
+  // WMF doesn't mark a stream as having a discontinuity after a seek(0).
+  bool mMustRecaptureAudioPosition;
+
  bool mHasAudio;
  bool mHasVideo;
  bool mCanSeek;