diff --git a/content/media/plugins/MediaPluginReader.cpp b/content/media/plugins/MediaPluginReader.cpp index 88b0ce81adb4..0c7ffb486574 100644 --- a/content/media/plugins/MediaPluginReader.cpp +++ b/content/media/plugins/MediaPluginReader.cpp @@ -324,7 +324,21 @@ nsresult MediaPluginReader::Seek(int64_t aTarget, int64_t aStartTime, int64_t aE mVideoQueue.Reset(); mAudioQueue.Reset(); - mAudioSeekTimeUs = mVideoSeekTimeUs = aTarget; + if (mHasAudio && mHasVideo) { + // The decoder seeks/demuxes audio and video streams separately. So if + // we seek both audio and video to aTarget, the audio stream can typically + // seek closer to the seek target, since typically every audio block is + // a sync point, whereas for video there are only keyframes once every few + // seconds. So if we have both audio and video, we must seek the video + // stream to the preceeding keyframe first, get the stream time, and then + // seek the audio stream to match the video stream's time. Otherwise, the + // audio and video streams won't be in sync after the seek. + mVideoSeekTimeUs = aTarget; + const VideoData* v = DecodeToFirstVideoData(); + mAudioSeekTimeUs = v ? v->mTime : aTarget; + } else { + mAudioSeekTimeUs = mVideoSeekTimeUs = aTarget; + } return NS_OK; }