Bug 1069660 - Factor Vorbis and Opus decoding out.

2024-10-14 13:55:43 +00:00 · 2014-09-30 14:14:40 +13:00 · 2014-09-30 14:14:40 +13:00 · 672372df86
commit 672372df86
parent a0648a0cdf
2 changed files with 208 additions and 183 deletions
--- a/content/media/webm/WebMReader.cpp
+++ b/content/media/webm/WebMReader.cpp
@ -531,7 +531,6 @@ bool WebMReader::DecodeAudioPacket(nestegg_packet* aPacket, int64_t aOffset)
    return false;
  }

-  const uint32_t rate = mInfo.mAudio.mRate;
  uint64_t tstamp_usecs = tstamp / NS_PER_USEC;
  if (mAudioStartUsec == -1) {
    // This is the first audio chunk. Assume the start time of our decode
@ -542,8 +541,8 @@ bool WebMReader::DecodeAudioPacket(nestegg_packet* aPacket, int64_t aOffset)
  // the previous audio chunk, we need to increment the packet count so that
  // the vorbis decode doesn't use data from before the gap to help decode
  // from after the gap.
-  CheckedInt64 tstamp_frames = UsecsToFrames(tstamp_usecs, rate);
-  CheckedInt64 decoded_frames = UsecsToFrames(mAudioStartUsec, rate);
+  CheckedInt64 tstamp_frames = UsecsToFrames(tstamp_usecs, mInfo.mAudio.mRate);
+  CheckedInt64 decoded_frames = UsecsToFrames(mAudioStartUsec, mInfo.mAudio.mRate);
  if (!tstamp_frames.isValid() || !decoded_frames.isValid()) {
    NS_WARNING("Int overflow converting WebM times to frames");
    return false;
@ -555,7 +554,7 @@ bool WebMReader::DecodeAudioPacket(nestegg_packet* aPacket, int64_t aOffset)
  }
  if (tstamp_frames.value() > decoded_frames.value()) {
 #ifdef DEBUG
-    CheckedInt64 usecs = FramesToUsecs(tstamp_frames.value() - decoded_frames.value(), rate);
+    CheckedInt64 usecs = FramesToUsecs(tstamp_frames.value() - decoded_frames.value(), mInfo.mAudio.mRate);
    LOG(PR_LOG_DEBUG, ("WebMReader detected gap of %lld, %lld frames, in audio stream\n",
                       usecs.isValid() ? usecs.value() : -1,
                       tstamp_frames.value() - decoded_frames.value()));
@ -574,198 +573,218 @@ bool WebMReader::DecodeAudioPacket(nestegg_packet* aPacket, int64_t aOffset)
      return false;
    }
    if (mAudioCodec == NESTEGG_CODEC_VORBIS) {
-      ogg_packet opacket = InitOggPacket(data, length, false, false, -1);
-
-      if (vorbis_synthesis(&mVorbisBlock, &opacket) != 0) {
+      if (!DecodeVorbis(data, length, aOffset, tstamp_usecs, &total_frames)) {
        return false;
      }
-
-      if (vorbis_synthesis_blockin(&mVorbisDsp,
-                                   &mVorbisBlock) != 0) {
-        return false;
-      }
-
-      VorbisPCMValue** pcm = 0;
-      int32_t frames = vorbis_synthesis_pcmout(&mVorbisDsp, &pcm);
-      // If the first packet of audio in the media produces no data, we
-      // still need to produce an AudioData for it so that the correct media
-      // start time is calculated.  Otherwise we'd end up with a media start
-      // time derived from the timecode of the first packet that produced
-      // data.
-      if (frames == 0 && mAudioFrames == 0) {
-        AudioQueue().Push(new AudioData(aOffset, tstamp_usecs, 0, 0, nullptr, mChannels, rate));
-      }
-      while (frames > 0) {
-        nsAutoArrayPtr<AudioDataValue> buffer(new AudioDataValue[frames * mChannels]);
-        for (uint32_t j = 0; j < mChannels; ++j) {
-          VorbisPCMValue* channel = pcm[j];
-          for (uint32_t i = 0; i < uint32_t(frames); ++i) {
-            buffer[i*mChannels + j] = MOZ_CONVERT_VORBIS_SAMPLE(channel[i]);
-          }
-        }
-
-        CheckedInt64 duration = FramesToUsecs(frames, rate);
-        if (!duration.isValid()) {
-          NS_WARNING("Int overflow converting WebM audio duration");
-          return false;
-        }
-        CheckedInt64 total_duration = FramesToUsecs(total_frames, rate);
-        if (!total_duration.isValid()) {
-          NS_WARNING("Int overflow converting WebM audio total_duration");
-          return false;
-        }
-
-        CheckedInt64 time = total_duration + tstamp_usecs;
-        if (!time.isValid()) {
-          NS_WARNING("Int overflow adding total_duration and tstamp_usecs");
-          return false;
-        };
-
-        total_frames += frames;
-        AudioQueue().Push(new AudioData(aOffset,
-                                        time.value(),
-                                        duration.value(),
-                                        frames,
-                                        buffer.forget(),
-                                        mChannels,
-                                        rate));
-        mAudioFrames += frames;
-        if (vorbis_synthesis_read(&mVorbisDsp, frames) != 0) {
-          return false;
-        }
-
-        frames = vorbis_synthesis_pcmout(&mVorbisDsp, &pcm);
-      }
    } else if (mAudioCodec == NESTEGG_CODEC_OPUS) {
 #ifdef MOZ_OPUS
-      uint32_t channels = mOpusParser->mChannels;
-
-      // Maximum value is 63*2880, so there's no chance of overflow.
-      int32_t frames_number = opus_packet_get_nb_frames(data, length);
-
-      if (frames_number <= 0)
-        return false; // Invalid packet header.
-      int32_t samples = opus_packet_get_samples_per_frame(data,
-                                                          (opus_int32) rate);
-      int32_t frames = frames_number*samples;
-
-      // A valid Opus packet must be between 2.5 and 120 ms long.
-      if (frames < 120 || frames > 5760)
+      if (!DecodeOpus(data, length, aOffset, tstamp_usecs, aPacket)) {
        return false;
-      nsAutoArrayPtr<AudioDataValue> buffer(new AudioDataValue[frames * channels]);
-
-      // Decode to the appropriate sample type.
-#ifdef MOZ_SAMPLE_TYPE_FLOAT32
-      int ret = opus_multistream_decode_float(mOpusDecoder,
-                                              data, length,
-                                              buffer, frames, false);
-#else
-      int ret = opus_multistream_decode(mOpusDecoder,
-                                        data, length,
-                                        buffer, frames, false);
-#endif
-      if (ret < 0)
-        return false;
-      NS_ASSERTION(ret == frames, "Opus decoded too few audio samples");
-      CheckedInt64 startTime = tstamp_usecs;
-
-      // Trim the initial frames while the decoder is settling.
-      if (mSkip > 0) {
-        int32_t skipFrames = std::min(mSkip, frames);
-        if (skipFrames == frames) {
-          // discard the whole packet
-          mSkip -= frames;
-          LOG(PR_LOG_DEBUG, ("Opus decoder skipping %d frames"
-                             " (whole packet)", frames));
-          return true;
-        }
-        int32_t keepFrames = frames - skipFrames;
-        int samples = keepFrames * channels;
-        nsAutoArrayPtr<AudioDataValue> trimBuffer(new AudioDataValue[samples]);
-        PodCopy(trimBuffer.get(), buffer.get() + skipFrames*channels, samples);
-        startTime = startTime + FramesToUsecs(skipFrames, rate);
-        frames = keepFrames;
-        buffer = trimBuffer;
-
-        mSkip -= skipFrames;
-        LOG(PR_LOG_DEBUG, ("Opus decoder skipping %d frames", skipFrames));
-      }
-
-      int64_t discardPadding = 0;
-      r = nestegg_packet_discard_padding(aPacket, &discardPadding);
-      if (discardPadding > 0) {
-        CheckedInt64 discardFrames = UsecsToFrames(discardPadding / NS_PER_USEC, rate);
-        if (!discardFrames.isValid()) {
-          NS_WARNING("Int overflow in DiscardPadding");
-          return false;
-        }
-        if (discardFrames.value() >= frames) {
-          LOG(PR_LOG_DEBUG, ("Opus decoder discarding whole packet"
-                             " (%d frames) as padding (%lld discarded)",
-                             frames, discardFrames.value()));
-          return true;
-        }
-        int32_t keepFrames = frames - discardFrames.value();
-        int32_t samples = keepFrames * channels;
-        nsAutoArrayPtr<AudioDataValue> trimBuffer(new AudioDataValue[samples]);
-        PodCopy(trimBuffer.get(), buffer.get(), samples);
-        frames = keepFrames;
-        buffer = trimBuffer;
-      }
-
-      // Apply the header gain if one was specified.
-#ifdef MOZ_SAMPLE_TYPE_FLOAT32
-      if (mOpusParser->mGain != 1.0f) {
-        float gain = mOpusParser->mGain;
-        int samples = frames * channels;
-        for (int i = 0; i < samples; i++) {
-          buffer[i] *= gain;
-        }
-      }
-#else
-      if (mOpusParser->mGain_Q16 != 65536) {
-        int64_t gain_Q16 = mOpusParser->mGain_Q16;
-        int samples = frames * channels;
-        for (int i = 0; i < samples; i++) {
-          int32_t val = static_cast<int32_t>((gain_Q16*buffer[i] + 32768)>>16);
-          buffer[i] = static_cast<AudioDataValue>(MOZ_CLIP_TO_15(val));
-        }
      }
 #endif
-
-      // No channel mapping for more than 8 channels.
-      if (channels > 8) {
-        return false;
-      }
-
-      CheckedInt64 duration = FramesToUsecs(frames, rate);
-      if (!duration.isValid()) {
-        NS_WARNING("Int overflow converting WebM audio duration");
-        return false;
-      }
-      CheckedInt64 time = startTime - mCodecDelay;
-      if (!time.isValid()) {
-        NS_WARNING("Int overflow shifting tstamp by codec delay");
-        return false;
-      };
-      AudioQueue().Push(new AudioData(mDecoder->GetResource()->Tell(),
-                                      time.value(),
-                                      duration.value(),
-                                      frames,
-                                      buffer.forget(),
-                                      mChannels,
-                                      rate));
-
-      mAudioFrames += frames;
-#else
-      return false;
-#endif /* MOZ_OPUS */
    }
  }

  return true;
 }

+bool WebMReader::DecodeVorbis(unsigned char* aData, size_t aLength,
+                              int64_t aOffset, uint64_t aTstampUsecs, int32_t* aTotalFrames)
+{
+  ogg_packet opacket = InitOggPacket(aData, aLength, false, false, -1);
+
+  if (vorbis_synthesis(&mVorbisBlock, &opacket) != 0) {
+    return false;
+  }
+
+  if (vorbis_synthesis_blockin(&mVorbisDsp,
+                               &mVorbisBlock) != 0) {
+    return false;
+  }
+
+  VorbisPCMValue** pcm = 0;
+  int32_t frames = vorbis_synthesis_pcmout(&mVorbisDsp, &pcm);
+  // If the first packet of audio in the media produces no data, we
+  // still need to produce an AudioData for it so that the correct media
+  // start time is calculated.  Otherwise we'd end up with a media start
+  // time derived from the timecode of the first packet that produced
+  // data.
+  if (frames == 0 && mAudioFrames == 0) {
+    AudioQueue().Push(new AudioData(aOffset, aTstampUsecs, 0, 0, nullptr, mChannels, mInfo.mAudio.mRate));
+  }
+  while (frames > 0) {
+    nsAutoArrayPtr<AudioDataValue> buffer(new AudioDataValue[frames * mChannels]);
+    for (uint32_t j = 0; j < mChannels; ++j) {
+      VorbisPCMValue* channel = pcm[j];
+      for (uint32_t i = 0; i < uint32_t(frames); ++i) {
+        buffer[i*mChannels + j] = MOZ_CONVERT_VORBIS_SAMPLE(channel[i]);
+      }
+    }
+
+    CheckedInt64 duration = FramesToUsecs(frames, mInfo.mAudio.mRate);
+    if (!duration.isValid()) {
+      NS_WARNING("Int overflow converting WebM audio duration");
+      return false;
+    }
+    CheckedInt64 total_duration = FramesToUsecs(*aTotalFrames, mInfo.mAudio.mRate);
+    if (!total_duration.isValid()) {
+      NS_WARNING("Int overflow converting WebM audio total_duration");
+      return false;
+    }
+
+    CheckedInt64 time = total_duration + aTstampUsecs;
+    if (!time.isValid()) {
+      NS_WARNING("Int overflow adding total_duration and aTstampUsecs");
+      return false;
+    };
+
+    *aTotalFrames += frames;
+    AudioQueue().Push(new AudioData(aOffset,
+                                    time.value(),
+                                    duration.value(),
+                                    frames,
+                                    buffer.forget(),
+                                    mChannels,
+                                    mInfo.mAudio.mRate));
+    mAudioFrames += frames;
+    if (vorbis_synthesis_read(&mVorbisDsp, frames) != 0) {
+      return false;
+    }
+
+    frames = vorbis_synthesis_pcmout(&mVorbisDsp, &pcm);
+  }
+
+  return true;
+}
+
+#ifdef MOZ_OPUS
+bool WebMReader::DecodeOpus(unsigned char* aData, size_t aLength,
+                            int64_t aOffset, uint64_t aTstampUsecs, nestegg_packet* aPacket)
+{
+  uint32_t channels = mOpusParser->mChannels;
+
+  // Maximum value is 63*2880, so there's no chance of overflow.
+  int32_t frames_number = opus_packet_get_nb_frames(aData, aLength);
+
+  if (frames_number <= 0)
+    return false; // Invalid packet header.
+  int32_t samples = opus_packet_get_samples_per_frame(aData,
+                                                      (opus_int32) mInfo.mAudio.mRate);
+  int32_t frames = frames_number*samples;
+
+  // A valid Opus packet must be between 2.5 and 120 ms long.
+  if (frames < 120 || frames > 5760)
+    return false;
+  nsAutoArrayPtr<AudioDataValue> buffer(new AudioDataValue[frames * channels]);
+
+  // Decode to the appropriate sample type.
+#ifdef MOZ_SAMPLE_TYPE_FLOAT32
+  int ret = opus_multistream_decode_float(mOpusDecoder,
+                                          aData, aLength,
+                                          buffer, frames, false);
+#else
+  int ret = opus_multistream_decode(mOpusDecoder,
+                                    aData, aLength,
+                                    buffer, frames, false);
+#endif
+  if (ret < 0)
+    return false;
+  NS_ASSERTION(ret == frames, "Opus decoded too few audio samples");
+  CheckedInt64 startTime = aTstampUsecs;
+
+  // Trim the initial frames while the decoder is settling.
+  if (mSkip > 0) {
+    int32_t skipFrames = std::min(mSkip, frames);
+    if (skipFrames == frames) {
+      // discard the whole packet
+      mSkip -= frames;
+      LOG(PR_LOG_DEBUG, ("Opus decoder skipping %d frames"
+                         " (whole packet)", frames));
+      return true;
+    }
+    int32_t keepFrames = frames - skipFrames;
+    int samples = keepFrames * channels;
+    nsAutoArrayPtr<AudioDataValue> trimBuffer(new AudioDataValue[samples]);
+    PodCopy(trimBuffer.get(), buffer.get() + skipFrames*channels, samples);
+    startTime = startTime + FramesToUsecs(skipFrames, mInfo.mAudio.mRate);
+    frames = keepFrames;
+    buffer = trimBuffer;
+
+    mSkip -= skipFrames;
+    LOG(PR_LOG_DEBUG, ("Opus decoder skipping %d frames", skipFrames));
+  }
+
+  int64_t discardPadding = 0;
+  (void) nestegg_packet_discard_padding(aPacket, &discardPadding);
+  if (discardPadding > 0) {
+    CheckedInt64 discardFrames = UsecsToFrames(discardPadding / NS_PER_USEC, mInfo.mAudio.mRate);
+    if (!discardFrames.isValid()) {
+      NS_WARNING("Int overflow in DiscardPadding");
+      return false;
+    }
+    if (discardFrames.value() >= frames) {
+      LOG(PR_LOG_DEBUG, ("Opus decoder discarding whole packet"
+                         " (%d frames) as padding (%lld discarded)",
+                         frames, discardFrames.value()));
+      return true;
+    }
+    int32_t keepFrames = frames - discardFrames.value();
+    int32_t samples = keepFrames * channels;
+    nsAutoArrayPtr<AudioDataValue> trimBuffer(new AudioDataValue[samples]);
+    PodCopy(trimBuffer.get(), buffer.get(), samples);
+    frames = keepFrames;
+    buffer = trimBuffer;
+  }
+
+  // Apply the header gain if one was specified.
+#ifdef MOZ_SAMPLE_TYPE_FLOAT32
+  if (mOpusParser->mGain != 1.0f) {
+    float gain = mOpusParser->mGain;
+    int samples = frames * channels;
+    for (int i = 0; i < samples; i++) {
+      buffer[i] *= gain;
+    }
+  }
+#else
+  if (mOpusParser->mGain_Q16 != 65536) {
+    int64_t gain_Q16 = mOpusParser->mGain_Q16;
+    int samples = frames * channels;
+    for (int i = 0; i < samples; i++) {
+      int32_t val = static_cast<int32_t>((gain_Q16*buffer[i] + 32768)>>16);
+      buffer[i] = static_cast<AudioDataValue>(MOZ_CLIP_TO_15(val));
+    }
+  }
+#endif
+
+  // No channel mapping for more than 8 channels.
+  if (channels > 8) {
+    return false;
+  }
+
+  CheckedInt64 duration = FramesToUsecs(frames, mInfo.mAudio.mRate);
+  if (!duration.isValid()) {
+    NS_WARNING("Int overflow converting WebM audio duration");
+    return false;
+  }
+  CheckedInt64 time = startTime - mCodecDelay;
+  if (!time.isValid()) {
+    NS_WARNING("Int overflow shifting tstamp by codec delay");
+    return false;
+  };
+  AudioQueue().Push(new AudioData(aOffset,
+                                  time.value(),
+                                  duration.value(),
+                                  frames,
+                                  buffer.forget(),
+                                  mChannels,
+                                  mInfo.mAudio.mRate));
+
+  mAudioFrames += frames;
+
+  return true;
+}
+#endif /* MOZ_OPUS */
+
 nsReturnRef<NesteggPacketHolder> WebMReader::NextPacket(TrackType aTrackType)
 {
  // The packet queue that packets will be pushed on if they
--- a/content/media/webm/WebMReader.h
+++ b/content/media/webm/WebMReader.h
@ -176,6 +176,12 @@ protected:
  // must be held during this call. The caller is responsible for freeing
  // aPacket.
  bool DecodeAudioPacket(nestegg_packet* aPacket, int64_t aOffset);
+  bool DecodeVorbis(unsigned char* aData, size_t aLength,
+                    int64_t aOffset, uint64_t aTstampUsecs, int32_t* aTotalFrames);
+#ifdef MOZ_OPUS
+  bool DecodeOpus(unsigned char* aData, size_t aLength,
+                  int64_t aOffset, uint64_t aTstampUsecs, nestegg_packet* aPacket);
+#endif

  // Release context and set to null. Called when an error occurs during
  // reading metadata or destruction of the reader itself.