Bug 1833654 - Switch android to f32. r=alwu,geckoview-reviewers,owlish

This is removing defines that control the preferred sample type and fixes all
uses.

Differential Revision: https://phabricator.services.mozilla.com/D181520
This commit is contained in:
Paul Adenot 2023-08-08 12:12:44 +00:00
parent 3902121a68
commit 70a99eb632
17 changed files with 35 additions and 233 deletions

View File

@ -210,13 +210,7 @@ class AudioConfig {
FORMAT_S24,
FORMAT_S32,
FORMAT_FLT,
# if defined(MOZ_SAMPLE_TYPE_FLOAT32)
FORMAT_DEFAULT = FORMAT_FLT
# elif defined(MOZ_SAMPLE_TYPE_S16)
FORMAT_DEFAULT = FORMAT_S16
# else
# error "Not supported audio type"
# endif
};
AudioConfig(const ChannelLayout& aChannelLayout, uint32_t aRate,

View File

@ -25,12 +25,8 @@ enum AudioSampleFormat {
AUDIO_FORMAT_S16,
// Signed 32-bit float samples
AUDIO_FORMAT_FLOAT32,
// The format used for output by AudioStream.
#ifdef MOZ_SAMPLE_TYPE_S16
AUDIO_OUTPUT_FORMAT = AUDIO_FORMAT_S16
#else
// The format used for output by AudioStream.
AUDIO_OUTPUT_FORMAT = AUDIO_FORMAT_FLOAT32
#endif
};
enum { MAX_AUDIO_SAMPLE_SIZE = sizeof(float) };

View File

@ -7,6 +7,7 @@
#include "GraphDriver.h"
#include "AudioNodeEngine.h"
#include "cubeb/cubeb.h"
#include "mozilla/dom/AudioContext.h"
#include "mozilla/dom/AudioDeviceInfo.h"
#include "mozilla/dom/BaseAudioContextBinding.h"
@ -591,14 +592,7 @@ void AudioCallbackDriver::Init() {
"This is blocking and should never run on the main thread.");
output.rate = mSampleRate;
#ifdef MOZ_SAMPLE_TYPE_S16
MOZ_ASSERT(AUDIO_OUTPUT_FORMAT == AUDIO_FORMAT_S16);
output.format = CUBEB_SAMPLE_S16NE;
#else
MOZ_ASSERT(AUDIO_OUTPUT_FORMAT == AUDIO_FORMAT_FLOAT32);
output.format = CUBEB_SAMPLE_FLOAT32NE;
#endif
if (!mOutputChannelCount) {
LOG(LogLevel::Warning, ("Output number of channels is 0."));

View File

@ -12,7 +12,6 @@
#include "MediaContainerType.h"
#include "MediaResource.h"
#include "TimeUnits.h"
#include "VorbisUtils.h"
#include "mozilla/Base64.h"
#include "mozilla/dom/ContentChild.h"
#include "mozilla/SchedulerGroup.h"

View File

@ -1,27 +0,0 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et cindent: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef VORBISUTILS_H_
#define VORBISUTILS_H_
#ifdef MOZ_SAMPLE_TYPE_S16
# include <ogg/os_types.h>
typedef ogg_int32_t VorbisPCMValue;
# define MOZ_CLIP_TO_15(x) ((x) < -32768 ? -32768 : (x) <= 32767 ? (x) : 32767)
// Convert the output of vorbis_synthesis_pcmout to a AudioDataValue
# define MOZ_CONVERT_VORBIS_SAMPLE(x) \
(static_cast<AudioDataValue>(MOZ_CLIP_TO_15((x) >> 9)))
#else /* MOZ_SAMPLE_TYPE_FLOAT32 */
typedef float VorbisPCMValue;
# define MOZ_CONVERT_VORBIS_SAMPLE(x) (x)
#endif
#endif /* VORBISUTILS_H_ */

View File

@ -338,17 +338,10 @@ nsresult OpusTrackEncoder::Encode(AudioSegment* aSegment) {
// really predict the output frame count at each call.
resamplingDest.SetLength(outframes * mChannels);
#if MOZ_SAMPLE_TYPE_S16
short* in = reinterpret_cast<short*>(pcm.Elements());
short* out = reinterpret_cast<short*>(resamplingDest.Elements());
speex_resampler_process_interleaved_int(mResampler, in, &inframes, out,
&outframes);
#else
float* in = reinterpret_cast<float*>(pcm.Elements());
float* out = reinterpret_cast<float*>(resamplingDest.Elements());
speex_resampler_process_interleaved_float(mResampler, in, &inframes, out,
&outframes);
#endif
MOZ_ASSERT(pcm.Length() >= mResampledLeftover.Length());
PodCopy(pcm.Elements(), mResampledLeftover.Elements(),
@ -406,15 +399,9 @@ nsresult OpusTrackEncoder::Encode(AudioSegment* aSegment) {
frameData->SetLength(MAX_DATA_BYTES);
// result is returned as opus error code if it is negative.
result = 0;
#ifdef MOZ_SAMPLE_TYPE_S16
const opus_int16* pcmBuf = static_cast<opus_int16*>(pcm.Elements());
result = opus_encode(mEncoder, pcmBuf, NumOutputFramesPerPacket(),
frameData->Elements(), MAX_DATA_BYTES);
#else
const float* pcmBuf = static_cast<float*>(pcm.Elements());
result = opus_encode_float(mEncoder, pcmBuf, NumOutputFramesPerPacket(),
frameData->Elements(), MAX_DATA_BYTES);
#endif
frameData->SetLength(result >= 0 ? result : 0);
if (result < 0) {

View File

@ -207,7 +207,6 @@ EXPORTS += [
"VideoLimits.h",
"VideoSegment.h",
"VideoUtils.h",
"VorbisUtils.h",
"WavDumper.h",
"XiphExtradata.h",
]

View File

@ -12,7 +12,6 @@
#include "TimeUnits.h"
#include "VideoUtils.h"
#include "VorbisDecoder.h" // For VorbisLayout
#include "VorbisUtils.h"
#include "mozilla/EndianUtils.h"
#include "mozilla/PodOperations.h"
#include "mozilla/SyncRunnable.h"

View File

@ -7,7 +7,6 @@
#include "VorbisDecoder.h"
#include "VideoUtils.h"
#include "VorbisUtils.h"
#include "XiphExtradata.h"
#include "mozilla/Logging.h"
#include "mozilla/PodOperations.h"
@ -179,7 +178,7 @@ RefPtr<MediaDataDecoder::DecodePromise> VorbisDataDecoder::Decode(
LOG(LogLevel::Warning, ("vorbis_synthesis_blockin returned an error"));
}
VorbisPCMValue** pcm = 0;
float** pcm = nullptr;
int32_t frames = vorbis_synthesis_pcmout(&mVorbisDsp, &pcm);
if (frames == 0) {
return DecodePromise::CreateAndResolve(DecodedData(), __func__);
@ -196,9 +195,9 @@ RefPtr<MediaDataDecoder::DecodePromise> VorbisDataDecoder::Decode(
MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__), __func__);
}
for (uint32_t j = 0; j < channels; ++j) {
VorbisPCMValue* channel = pcm[j];
float* channel = pcm[j];
for (uint32_t i = 0; i < uint32_t(frames); ++i) {
buffer[i * channels + j] = MOZ_CONVERT_VORBIS_SAMPLE(channel[i]);
buffer[i * channels + j] = channel[i];
}
}

View File

@ -757,24 +757,29 @@ class RemoteAudioDecoder final : public RemoteDataDecoder {
}
if (size > 0) {
#ifdef MOZ_SAMPLE_TYPE_S16
const int32_t numSamples = size / 2;
#else
# error We only support 16-bit integer PCM
#endif
const int32_t sampleSize = sizeof(int16_t);
const int32_t numSamples = size / sampleSize;
AlignedAudioBuffer audio(numSamples);
InflatableShortBuffer audio(numSamples);
if (!audio) {
Error(MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__));
LOG("OOM while allocating temporary output buffer");
return;
}
jni::ByteBuffer::LocalRef dest = jni::ByteBuffer::New(audio.get(), size);
aBuffer->WriteToByteBuffer(dest, offset, size);
AlignedFloatBuffer converted = audio.Inflate();
TimeUnit pts = TimeUnit::FromMicroseconds(presentationTimeUs);
LOG("Decoded: %u frames of %s audio, pts: %s, %d channels, %" PRId32
" Hz",
numSamples / mOutputChannels,
sampleSize == sizeof(int16_t) ? "int16" : "f32", pts.ToString().get(),
mOutputChannels, mOutputSampleRate);
RefPtr<AudioData> data =
new AudioData(0, TimeUnit::FromMicroseconds(presentationTimeUs),
std::move(audio), mOutputChannels, mOutputSampleRate);
0, pts, std::move(converted), mOutputChannels, mOutputSampleRate);
UpdateOutputStatus(std::move(data));
}
@ -815,6 +820,8 @@ already_AddRefed<MediaDataDecoder> RemoteDataDecoder::CreateAudioDecoder(
java::sdk::MediaFormat::CreateAudioFormat(config.mMimeType, config.mRate,
config.mChannels, &format),
nullptr);
// format->SetInteger(java::sdk::MediaFormat::KEY_PCM_ENCODING,
// java::sdk::AudioFormat::ENCODING_PCM_FLOAT);
RefPtr<MediaDataDecoder> decoder =
new RemoteAudioDecoder(config, format, aDrmStubId);

View File

@ -545,15 +545,8 @@ MediaResult AppleATDecoder::SetupDecoder(MediaRawData* aSample) {
mOutputFormat.mFormatID = kAudioFormatLinearPCM;
mOutputFormat.mSampleRate = inputFormat.mSampleRate;
mOutputFormat.mChannelsPerFrame = inputFormat.mChannelsPerFrame;
#if defined(MOZ_SAMPLE_TYPE_FLOAT32)
mOutputFormat.mBitsPerChannel = 32;
mOutputFormat.mFormatFlags = kLinearPCMFormatFlagIsFloat | 0;
#elif defined(MOZ_SAMPLE_TYPE_S16)
mOutputFormat.mBitsPerChannel = 16;
mOutputFormat.mFormatFlags = kLinearPCMFormatFlagIsSignedInteger | 0;
#else
# error Unknown audio sample type
#endif
// Set up the decoder so it gives us one sample per frame
mOutputFormat.mFramesPerPacket = 1;
mOutputFormat.mBytesPerPacket = mOutputFormat.mBytesPerFrame =

View File

@ -109,21 +109,6 @@ void FFmpegAudioDecoder<LIBAV_VER>::InitCodecContext() {
}
mCodecContext->sample_rate = AssertedCast<int>(mAudioInfo.mRate);
#endif
#ifdef FFVPX_VERSION
// AudioInfo's layout first 32-bits are bit-per-bit compatible with
// WAVEFORMATEXTENSIBLE and FFmpeg's AVChannel enum. We can cast here.
mCodecContext->ch_layout.nb_channels =
AssertedCast<int>(mAudioInfo.mChannels);
if (mAudioInfo.mChannelMap != AudioConfig::ChannelLayout::UNKNOWN_MAP) {
mLib->av_channel_layout_from_mask(
&mCodecContext->ch_layout,
static_cast<uint64_t>(mAudioInfo.mChannelMap));
} else {
mLib->av_channel_layout_default(&mCodecContext->ch_layout,
AssertedCast<int>(mAudioInfo.mChannels));
}
mCodecContext->sample_rate = AssertedCast<int>(mAudioInfo.mRate);
#endif
}
static AlignedAudioBuffer CopyAndPackAudio(AVFrame* aFrame,
@ -134,61 +119,6 @@ static AlignedAudioBuffer CopyAndPackAudio(AVFrame* aFrame,
return audio;
}
#ifdef MOZ_SAMPLE_TYPE_S16
if (aFrame->format == AV_SAMPLE_FMT_FLT) {
// Audio data already packed. Need to convert from 32 bits Float to S16
AudioDataValue* tmp = audio.get();
float* data = reinterpret_cast<float**>(aFrame->data)[0];
for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
for (uint32_t channel = 0; channel < aNumChannels; channel++) {
*tmp++ = FloatToAudioSample<int16_t>(*data++);
}
}
} else if (aFrame->format == AV_SAMPLE_FMT_FLTP) {
// Planar audio data. Convert it from 32 bits float to S16
// and pack it into something we can understand.
AudioDataValue* tmp = audio.get();
float** data = reinterpret_cast<float**>(aFrame->data);
for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
for (uint32_t channel = 0; channel < aNumChannels; channel++) {
*tmp++ = FloatToAudioSample<int16_t>(data[channel][frame]);
}
}
} else if (aFrame->format == AV_SAMPLE_FMT_S16) {
// Audio data already packed. No need to do anything other than copy it
// into a buffer we own.
memcpy(audio.get(), aFrame->data[0],
aNumChannels * aNumAFrames * sizeof(AudioDataValue));
} else if (aFrame->format == AV_SAMPLE_FMT_S16P) {
// Planar audio data. Pack it into something we can understand.
AudioDataValue* tmp = audio.get();
AudioDataValue** data = reinterpret_cast<AudioDataValue**>(aFrame->data);
for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
for (uint32_t channel = 0; channel < aNumChannels; channel++) {
*tmp++ = data[channel][frame];
}
}
} else if (aFrame->format == AV_SAMPLE_FMT_S32) {
// Audio data already packed. Need to convert from S32 to S16
AudioDataValue* tmp = audio.get();
int32_t* data = reinterpret_cast<int32_t**>(aFrame->data)[0];
for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
for (uint32_t channel = 0; channel < aNumChannels; channel++) {
*tmp++ = *data++ / (1U << 16);
}
}
} else if (aFrame->format == AV_SAMPLE_FMT_S32P) {
// Planar audio data. Convert it from S32 to S16
// and pack it into something we can understand.
AudioDataValue* tmp = audio.get();
int32_t** data = reinterpret_cast<int32_t**>(aFrame->data);
for (uint32_t frame = 0; frame < aNumAFrames; frame++) {
for (uint32_t channel = 0; channel < aNumChannels; channel++) {
*tmp++ = data[channel][frame] / (1U << 16);
}
}
}
#else
if (aFrame->format == AV_SAMPLE_FMT_FLT) {
// Audio data already packed. No need to do anything other than copy it
// into a buffer we own.
@ -242,7 +172,6 @@ static AlignedAudioBuffer CopyAndPackAudio(AVFrame* aFrame,
}
}
}
#endif
return audio;
}

View File

@ -38,20 +38,8 @@ int WebAudioUtils::SpeexResamplerProcess(SpeexResamplerState* aResampler,
uint32_t aChannel, const float* aIn,
uint32_t* aInLen, float* aOut,
uint32_t* aOutLen) {
#ifdef MOZ_SAMPLE_TYPE_S16
AutoTArray<AudioDataValue, WEBAUDIO_BLOCK_SIZE * 4> tmp1;
AutoTArray<AudioDataValue, WEBAUDIO_BLOCK_SIZE * 4> tmp2;
tmp1.SetLength(*aInLen);
tmp2.SetLength(*aOutLen);
ConvertAudioSamples(aIn, tmp1.Elements(), *aInLen);
int result = speex_resampler_process_int(
aResampler, aChannel, tmp1.Elements(), aInLen, tmp2.Elements(), aOutLen);
ConvertAudioSamples(tmp2.Elements(), aOut, *aOutLen);
return result;
#else
return speex_resampler_process_float(aResampler, aChannel, aIn, aInLen, aOut,
aOutLen);
#endif
}
int WebAudioUtils::SpeexResamplerProcess(SpeexResamplerState* aResampler,
@ -59,29 +47,17 @@ int WebAudioUtils::SpeexResamplerProcess(SpeexResamplerState* aResampler,
uint32_t* aInLen, float* aOut,
uint32_t* aOutLen) {
AutoTArray<AudioDataValue, WEBAUDIO_BLOCK_SIZE * 4> tmp;
#ifdef MOZ_SAMPLE_TYPE_S16
tmp.SetLength(*aOutLen);
int result = speex_resampler_process_int(aResampler, aChannel, aIn, aInLen,
tmp.Elements(), aOutLen);
ConvertAudioSamples(tmp.Elements(), aOut, *aOutLen);
return result;
#else
tmp.SetLength(*aInLen);
ConvertAudioSamples(aIn, tmp.Elements(), *aInLen);
int result = speex_resampler_process_float(
aResampler, aChannel, tmp.Elements(), aInLen, aOut, aOutLen);
return result;
#endif
}
int WebAudioUtils::SpeexResamplerProcess(SpeexResamplerState* aResampler,
uint32_t aChannel, const int16_t* aIn,
uint32_t* aInLen, int16_t* aOut,
uint32_t* aOutLen) {
#ifdef MOZ_SAMPLE_TYPE_S16
return speex_resampler_process_int(aResampler, aChannel, aIn, aInLen, aOut,
aOutLen);
#else
AutoTArray<AudioDataValue, WEBAUDIO_BLOCK_SIZE * 4> tmp1;
AutoTArray<AudioDataValue, WEBAUDIO_BLOCK_SIZE * 4> tmp2;
tmp1.SetLength(*aInLen);
@ -91,7 +67,6 @@ int WebAudioUtils::SpeexResamplerProcess(SpeexResamplerState* aResampler,
aResampler, aChannel, tmp1.Elements(), aInLen, tmp2.Elements(), aOutLen);
ConvertAudioSamples(tmp2.Elements(), aOut, *aOutLen);
return result;
#endif
}
void WebAudioUtils::LogToDeveloperConsole(uint64_t aWindowID,

View File

@ -109,20 +109,9 @@ nsReturnRef<HRTFKernel> HRTFElevation::calculateKernelForAzimuthElevation(
const int16_t(&impulse_response_data)[ResponseFrameSize] =
irc_composite_c_r0195[elevationIndex].azimuths[azimuthIndex];
// When libspeex_resampler is compiled with FIXED_POINT, samples in
// speex_resampler_process_float are rounded directly to int16_t, which
// only works well if the floats are in the range +/-32767. On such
// platforms it's better to resample before converting to float anyway.
#ifdef MOZ_SAMPLE_TYPE_S16
# define RESAMPLER_PROCESS speex_resampler_process_int
const int16_t* response = impulse_response_data;
const int16_t* resampledResponse;
#else
# define RESAMPLER_PROCESS speex_resampler_process_float
float response[ResponseFrameSize];
ConvertAudioSamples(impulse_response_data, response, ResponseFrameSize);
float* resampledResponse;
#endif
// Note that depending on the fftSize returned by the panner, we may be
// truncating the impulse response.
@ -140,8 +129,8 @@ nsReturnRef<HRTFKernel> HRTFElevation::calculateKernelForAzimuthElevation(
// Feed the input buffer into the resampler.
spx_uint32_t in_len = ResponseFrameSize;
spx_uint32_t out_len = resampled.Length();
RESAMPLER_PROCESS(resampler, 0, response, &in_len, resampled.Elements(),
&out_len);
speex_resampler_process_float(resampler, 0, response, &in_len,
resampled.Elements(), &out_len);
if (out_len < resampled.Length()) {
// The input should have all been processed.
@ -150,8 +139,8 @@ nsReturnRef<HRTFKernel> HRTFElevation::calculateKernelForAzimuthElevation(
spx_uint32_t out_index = out_len;
in_len = speex_resampler_get_input_latency(resampler);
out_len = resampled.Length() - out_index;
RESAMPLER_PROCESS(resampler, 0, nullptr, &in_len,
resampled.Elements() + out_index, &out_len);
speex_resampler_process_float(resampler, 0, nullptr, &in_len,
resampled.Elements() + out_index, &out_len);
out_index += out_len;
// There may be some uninitialized samples remaining for very low
// sample rates.
@ -161,18 +150,8 @@ nsReturnRef<HRTFKernel> HRTFElevation::calculateKernelForAzimuthElevation(
speex_resampler_reset_mem(resampler);
}
#ifdef MOZ_SAMPLE_TYPE_S16
AutoTArray<float, 2 * ResponseFrameSize> floatArray;
floatArray.SetLength(resampledResponseLength);
float* floatResponse = floatArray.Elements();
ConvertAudioSamples(resampledResponse, floatResponse,
resampledResponseLength);
#else
float* floatResponse = resampledResponse;
#endif
#undef RESAMPLER_PROCESS
return HRTFKernel::create(floatResponse, resampledResponseLength, sampleRate);
return HRTFKernel::create(resampledResponse, resampledResponseLength,
sampleRate);
}
// The range of elevations for the IRCAM impulse responses varies depending on

View File

@ -24,14 +24,10 @@ FINAL_LIBRARY = 'gkmedias'
DEFINES['OUTSIDE_SPEEX'] = True
DEFINES['EXPORT'] = ''
if CONFIG['MOZ_SAMPLE_TYPE_S16']:
DEFINES['FIXED_POINT'] = True
else:
DEFINES['FLOATING_POINT'] = True
DEFINES['FLOATING_POINT'] = True
# Only use SSE code when using floating point samples, and on x86
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['MOZ_SAMPLE_TYPE_S16']:
if CONFIG['INTEL_ARCHITECTURE']:
DEFINES['_USE_SSE'] = True
DEFINES['_USE_SSE2'] = True
SOURCES += [

View File

@ -799,30 +799,11 @@ set_config("MOZ_FMP4", fmp4)
set_define("MOZ_FMP4", fmp4)
@depends(target)
def sample_type_is_s16(target):
# Use integers over floats for audio on Android regardless of the CPU
# architecture, because audio backends for Android don't support floats.
# We also use integers on ARM because it's more efficient.
if target.os == "Android" or target.cpu == "arm":
return True
set_config("MOZ_SAMPLE_TYPE_FLOAT32", True)
set_define("MOZ_SAMPLE_TYPE_FLOAT32", True)
@depends(sample_type_is_s16)
def sample_type_is_float(t):
if not t:
return True
set_config("MOZ_SAMPLE_TYPE_S16", sample_type_is_s16)
set_define("MOZ_SAMPLE_TYPE_S16", sample_type_is_s16)
set_config("MOZ_SAMPLE_TYPE_FLOAT32", sample_type_is_float)
set_define("MOZ_SAMPLE_TYPE_FLOAT32", sample_type_is_float)
set_define("MOZ_VORBIS", sample_type_is_float)
set_config("MOZ_VORBIS", sample_type_is_float)
set_define("MOZ_TREMOR", sample_type_is_s16)
set_config("MOZ_TREMOR", sample_type_is_s16)
set_define("MOZ_VORBIS", True)
set_config("MOZ_VORBIS", True)
option(
"--disable-real-time-tracing",

View File

@ -9,5 +9,7 @@
# We only use constants from KeyStatus
[android.media.MediaDrm$KeyStatus = skip:true]
<field> = skip:false
[android.media.AudioFormat = skip:true]
<field> = skip:false
[android.media.MediaFormat = exceptionMode:nsresult]