Merge pull request #7315 from hrydgard/audio-resample

Use Dolphin's audio resampler to lower latency on Android and prevent drift
This commit is contained in:
Henrik Rydgård 2015-01-11 21:10:08 +01:00
commit 82ccce9eca
20 changed files with 553 additions and 399 deletions

View File

@ -1276,6 +1276,8 @@ add_library(${CoreLibName} ${CoreLinkType}
Core/HW/MemoryStick.h
Core/HW/SasAudio.cpp
Core/HW/SasAudio.h
Core/HW/StereoResampler.cpp
Core/HW/StereoResampler.h
Core/Host.cpp
Core/Host.h
Core/Loaders.cpp

View File

@ -1,29 +1,12 @@
// Copyright (C) 2003 Dolphin Project.
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
// IWYU pragma: private, include "Common/Atomic.h"
#pragma once
#ifdef BLACKBERRY
#include <atomic.h>
#elif defined(__SYMBIAN32__)
#include <e32atomics.h>
#endif
#include "Common.h"
#include "CommonTypes.h"
// Atomic operations are performed in a single step by the CPU. It is
// impossible for other threads to see the operation "half-done."
@ -42,53 +25,78 @@
namespace Common
{
inline void AtomicAdd(volatile u32& target, u32 value) {
inline void AtomicAdd(volatile u32& target, u32 value)
{
__sync_add_and_fetch(&target, value);
}
inline void AtomicAnd(volatile u32& target, u32 value) {
inline void AtomicAnd(volatile u32& target, u32 value)
{
__sync_and_and_fetch(&target, value);
}
inline void AtomicDecrement(volatile u32& target) {
inline void AtomicDecrement(volatile u32& target)
{
__sync_add_and_fetch(&target, -1);
}
inline void AtomicIncrement(volatile u32& target) {
inline void AtomicIncrement(volatile u32& target)
{
__sync_add_and_fetch(&target, 1);
}
inline u32 AtomicLoad(volatile u32& src) {
return src; // 32-bit reads are always atomic.
}
inline u32 AtomicLoadAcquire(volatile u32& src) {
#ifdef __SYMBIAN32__
return __e32_atomic_load_acq32(&src);
#else
//keep the compiler from caching any memory references
u32 result = src; // 32-bit reads are always atomic.
//__sync_synchronize(); // TODO: May not be necessary.
// Compiler instruction only. x86 loads always have acquire semantics.
__asm__ __volatile__ ( "":::"memory" );
return result;
#endif
}
inline void AtomicOr(volatile u32& target, u32 value) {
inline void AtomicOr(volatile u32& target, u32 value)
{
__sync_or_and_fetch(&target, value);
}
inline void AtomicStore(volatile u32& dest, u32 value) {
dest = value; // 32-bit writes are always atomic.
// Support clang versions older than 3.4.
#if __clang__
#if !__has_feature(cxx_atomic)
template <typename T>
_Atomic(T)* ToC11Atomic(volatile T* loc)
{
return (_Atomic(T)*) loc;
}
inline void AtomicStoreRelease(volatile u32& dest, u32 value) {
#ifdef BLACKBERRY
atomic_set(&dest, value);
#elif defined(__SYMBIAN32__)
__e32_atomic_store_rel32(&dest, value);
#else
__sync_lock_test_and_set(&dest, value); // TODO: Wrong! This function has acquire semantics.
#define __atomic_load_n(p, m) __c11_atomic_load(ToC11Atomic(p), m)
#define __atomic_store_n(p, v, m) __c11_atomic_store(ToC11Atomic(p), v, m)
#define __atomic_exchange_n(p, v, m) __c11_atomic_exchange(ToC11Atomic(p), v, m)
#endif
#endif
#ifndef __ATOMIC_RELAXED
#error __ATOMIC_RELAXED not defined; your compiler version is too old.
#endif
template <typename T>
inline T AtomicLoad(volatile T& src)
{
return __atomic_load_n(&src, __ATOMIC_RELAXED);
}
template <typename T>
inline T AtomicLoadAcquire(volatile T& src)
{
return __atomic_load_n(&src, __ATOMIC_ACQUIRE);
}
template <typename T, typename U>
inline void AtomicStore(volatile T& dest, U value)
{
__atomic_store_n(&dest, value, __ATOMIC_RELAXED);
}
template <typename T, typename U>
inline void AtomicStoreRelease(volatile T& dest, U value)
{
__atomic_store_n(&dest, value, __ATOMIC_RELEASE);
}
template <typename T, typename U>
inline T* AtomicExchangeAcquire(T* volatile& loc, U newval)
{
return __atomic_exchange_n(&loc, newval, __ATOMIC_ACQ_REL);
}
}

View File

@ -1,29 +1,15 @@
// Copyright (C) 2003 Dolphin Project.
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
// IWYU pragma: private, include "Common/Atomic.h"
#pragma once
#include "Common.h"
#ifndef _XBOX
#include <intrin.h>
#else
#include <ppcintrinsics.h>
#endif
#include "CommonWindows.h"
#include <Windows.h>
#include "CommonTypes.h"
// Atomic operations are performed in a single step by the CPU. It is
// impossible for other threads to see the operation "half-done."
@ -46,41 +32,66 @@
namespace Common
{
inline void AtomicAdd(volatile u32& target, u32 value) {
InterlockedExchangeAdd((volatile LONG*)&target, (LONG)value);
inline void AtomicAdd(volatile u32& target, u32 value)
{
_InterlockedExchangeAdd((volatile LONG*)&target, (LONG)value);
}
inline void AtomicAnd(volatile u32& target, u32 value) {
inline void AtomicAnd(volatile u32& target, u32 value)
{
_InterlockedAnd((volatile LONG*)&target, (LONG)value);
}
inline void AtomicIncrement(volatile u32& target) {
InterlockedIncrement((volatile LONG*)&target);
inline void AtomicIncrement(volatile u32& target)
{
_InterlockedIncrement((volatile LONG*)&target);
}
inline void AtomicDecrement(volatile u32& target) {
InterlockedDecrement((volatile LONG*)&target);
inline void AtomicDecrement(volatile u32& target)
{
_InterlockedDecrement((volatile LONG*)&target);
}
inline u32 AtomicLoad(volatile u32& src) {
inline void AtomicOr(volatile u32& target, u32 value)
{
_InterlockedOr((volatile LONG*)&target, (LONG)value);
}
// For the comment below to hold, better only use this with 32-bit types..
template <typename T>
inline T AtomicLoad(volatile T& src)
{
return src; // 32-bit reads are always atomic.
}
inline u32 AtomicLoadAcquire(volatile u32& src) {
u32 result = src; // 32-bit reads are always atomic.
// For the comment below to hold, better only use this with 32-bit types..
template <typename T>
inline T AtomicLoadAcquire(volatile T& src)
{
T result = src; // 32-bit reads are always atomic.
_ReadBarrier(); // Compiler instruction only. x86 loads always have acquire semantics.
return result;
}
inline void AtomicOr(volatile u32& target, u32 value) {
_InterlockedOr((volatile LONG*)&target, (LONG)value);
// For the comment below to hold, better only use this with 32-bit types..
template <typename T, typename U>
inline void AtomicStore(volatile T& dest, U value)
{
dest = (T)value; // 32-bit writes are always atomic.
}
inline void AtomicStore(volatile u32& dest, u32 value) {
dest = value; // 32-bit writes are always atomic.
}
inline void AtomicStoreRelease(volatile u32& dest, u32 value) {
// For the comment below to hold, better only use this with 32-bit types..
template <typename T, typename U>
inline void AtomicStoreRelease(volatile T& dest, U value)
{
_WriteBarrier(); // Compiler instruction only. x86 stores always have release semantics.
dest = value; // 32-bit writes are always atomic.
dest = (T)value; // 32-bit writes are always atomic
}
template <typename T, typename U>
inline T* AtomicExchangeAcquire(T* volatile& loc, U newval)
{
return (T*)_InterlockedExchangePointer_acq((void* volatile*)&loc, (void*)newval);
}
}

View File

@ -334,8 +334,6 @@ static bool DefaultForceFlushToZero() {
static ConfigSetting cpuSettings[] = {
ReportedConfigSetting("Jit", &g_Config.bJit, &DefaultJit, true, true),
ReportedConfigSetting("SeparateCPUThread", &g_Config.bSeparateCPUThread, false, true, true),
ConfigSetting("AtomicAudioLocks", &g_Config.bAtomicAudioLocks, false, true, true),
ReportedConfigSetting("SeparateIOThread", &g_Config.bSeparateIOThread, true, true, true),
ReportedConfigSetting("IOTimingMethod", &g_Config.iIOTimingMethod, IOTIMING_FAST, true, true),
ConfigSetting("FastMemoryAccess", &g_Config.bFastMemory, true, true, true),

View File

@ -118,7 +118,6 @@ public:
bool bSeparateCPUThread;
int iIOTimingMethod;
bool bSeparateIOThread;
bool bAtomicAudioLocks;
int iLockedCPUSpeed;
bool bAutoSaveSymbolMap;
bool bCacheFullIsoInRam;

View File

@ -274,6 +274,7 @@
<ClCompile Include="HW\SasAudio.cpp" />
<ClCompile Include="HW\AsyncIOManager.cpp" />
<ClCompile Include="HW\SimpleAudioDec.cpp" />
<ClCompile Include="HW\StereoResampler.cpp" />
<ClCompile Include="Loaders.cpp" />
<ClCompile Include="MemMap.cpp" />
<ClCompile Include="MemmapFunctions.cpp" />
@ -512,6 +513,7 @@
<ClInclude Include="HW\MemoryStick.h" />
<ClInclude Include="HW\AsyncIOManager.h" />
<ClInclude Include="HW\SimpleAudioDec.h" />
<ClInclude Include="HW\StereoResampler.h" />
<ClInclude Include="Loaders.h" />
<ClInclude Include="MemMap.h" />
<ClInclude Include="MIPS\ARM\ArmAsm.h">

View File

@ -300,6 +300,9 @@
<ClCompile Include="HW\MediaEngine.cpp">
<Filter>HW</Filter>
</ClCompile>
<ClCompile Include="HW\StereoResampler.cpp">
<Filter>HW</Filter>
</ClCompile>
<ClCompile Include="Util\PPGeDraw.cpp">
<Filter>Util</Filter>
</ClCompile>
@ -775,6 +778,9 @@
<ClInclude Include="HW\MediaEngine.h">
<Filter>HW</Filter>
</ClInclude>
<ClInclude Include="HW\StereoResampler.h">
<Filter>HW</Filter>
</ClInclude>
<ClInclude Include="Util\PPGeDraw.h">
<Filter>Util</Filter>
</ClInclude>

View File

@ -35,7 +35,9 @@
#include "Core/HLE/sceAudio.h"
#include "Core/HLE/sceKernel.h"
#include "Core/HLE/sceKernelThread.h"
#include "Core/HW/StereoResampler.h"
StereoResampler resampler;
// Should be used to lock anything related to the outAudioQueue.
// atomic locks are used on the lock. TODO: make this lock-free
@ -67,14 +69,6 @@ static s32 *mixBuffer;
static int chanQueueMaxSizeFactor;
static int chanQueueMinSizeFactor;
// TODO: Need to replace this with something lockless. Mutexes in the audio pipeline
// is bad mojo.
FixedSizeQueue<s16, 512 * 16> outAudioQueue;
bool __gainAudioQueueLock();
void __releaseAcquiredLock();
void __blockForAudioQueueLock();
static inline s16 adjustvolume(s16 sample, int vol) {
#ifdef ARM
register int r;
@ -181,9 +175,7 @@ void __AudioInit() {
mixBuffer = new s32[hwBlockSize * 2];
memset(mixBuffer, 0, hwBlockSize * 2 * sizeof(s32));
__blockForAudioQueueLock();
outAudioQueue.clear();
__releaseAcquiredLock();
resampler.Clear();
CoreTiming::RegisterMHzChangeCallback(&__AudioCPUMHzChange);
}
@ -199,16 +191,14 @@ void __AudioDoState(PointerWrap &p) {
p.Do(mixFrequency);
{
//block until a lock is achieved. Not a good idea at all, but
//can't think of a better one...
__blockForAudioQueueLock();
if (s >= 2) {
resampler.DoState(p);
} else {
// Only to preserve the previous file format. Might cause a slight audio glitch on upgrades?
FixedSizeQueue<s16, 512 * 16> outAudioQueue;
outAudioQueue.DoState(p);
//release the atomic lock
__releaseAcquiredLock();
resampler.Clear();
}
int chanCount = ARRAY_SIZE(chans);
@ -358,28 +348,6 @@ void __AudioSetOutputFrequency(int freq) {
mixFrequency = freq;
}
inline void ClampBufferToS16(s16 *out, s32 *in, size_t size) {
#ifdef _M_SSE
// Size will always be 16-byte aligned as the hwBlockSize is.
while (size >= 8) {
__m128i in1 = _mm_loadu_si128((__m128i *)in);
__m128i in2 = _mm_loadu_si128((__m128i *)(in + 4));
__m128i packed = _mm_packs_epi32(in1, in2);
_mm_storeu_si128((__m128i *)out, packed);
out += 8;
in += 8;
size -= 8;
}
for (size_t i = 0; i < size; i++) {
out[i] = clamp_s16(in[i]);
}
#else
for (size_t i = 0; i < size; i++) {
out[i] = clamp_s16(in[i]);
}
#endif
}
// Mix samples from the various audio channels into a single sample queue.
// This single sample queue is where __AudioMix should read from. If the sample queue is full, we should
// just sleep the main emulator thread a little.
@ -433,103 +401,13 @@ void __AudioUpdate() {
}
if (g_Config.bEnableSound) {
__blockForAudioQueueLock();
/*
if (!__gainAudioQueueLock()){
return;
}
*/
if (outAudioQueue.room() >= hwBlockSize * 2) {
s16 *buf1 = 0, *buf2 = 0;
size_t sz1, sz2;
outAudioQueue.pushPointers(hwBlockSize * 2, &buf1, &sz1, &buf2, &sz2);
ClampBufferToS16(buf1, mixBuffer, sz1);
if (buf2) {
ClampBufferToS16(buf2, mixBuffer + sz1, sz2);
}
} else {
// This happens quite a lot. There's still something slightly off
// about the amount of audio we produce.
}
//release the atomic lock
__releaseAcquiredLock();
resampler.PushSamples(mixBuffer, hwBlockSize);
}
}
// numFrames is number of stereo frames.
// This is called from *outside* the emulator thread.
int __AudioMix(short *outstereo, int numFrames)
{
// TODO: if mixFrequency != the actual output frequency, resample!
int underrun = -1;
s16 sampleL = 0;
s16 sampleR = 0;
const s16 *buf1 = 0, *buf2 = 0;
size_t sz1, sz2;
{
//TODO: do rigorous testing to see whether just blind locking will improve speed.
if (!__gainAudioQueueLock()){
memset(outstereo, 0, numFrames * 2 * sizeof(short));
return 0;
}
outAudioQueue.popPointers(numFrames * 2, &buf1, &sz1, &buf2, &sz2);
memcpy(outstereo, buf1, sz1 * sizeof(s16));
if (buf2) {
memcpy(outstereo + sz1, buf2, sz2 * sizeof(s16));
}
//release the atomic lock
__releaseAcquiredLock();
}
int remains = (int)(numFrames * 2 - sz1 - sz2);
if (remains > 0)
memset(outstereo + numFrames * 2 - remains, 0, remains*sizeof(s16));
if (sz1 + sz2 < (size_t)numFrames) {
underrun = (int)(sz1 + sz2) / 2;
VERBOSE_LOG(SCEAUDIO, "Audio out buffer UNDERRUN at %i of %i", underrun, numFrames);
}
return underrun >= 0 ? underrun : numFrames;
}
/*returns whether the lock was successfully gained or not.
i.e - whether the lock belongs to you
*/
inline bool __gainAudioQueueLock(){
if (g_Config.bAtomicAudioLocks){
/*if the previous state was 0, that means the lock was "unlocked". So,
we return !0, which is true thanks to C's int to bool conversion
One the other hand, if it was locked, then the lock would return 1.
so, !1 = 0 = false.
*/
return atomicLock_.test_and_set() == 0;
} else {
mutex_.lock();
return true;
}
};
inline void __releaseAcquiredLock(){
if (g_Config.bAtomicAudioLocks){
atomicLock_.clear();
} else {
mutex_.unlock();
}
}
inline void __blockForAudioQueueLock(){
if (g_Config.bAtomicAudioLocks){
while ((atomicLock_.test_and_set() == 0)){ }
} else {
mutex_.lock();
}
int __AudioMix(short *outstereo, int numFrames, int sampleRate) {
resampler.Mix(outstereo, numFrames, false, sampleRate);
return numFrames;
}

View File

@ -32,4 +32,4 @@ u32 __AudioEnqueue(AudioChannel &chan, int chanNum, bool blocking);
void __AudioWakeThreads(AudioChannel &chan, int result, int step);
void __AudioWakeThreads(AudioChannel &chan, int result);
int __AudioMix(short *outstereo, int numSamples);
int __AudioMix(short *outstereo, int numSamples, int sampleRate);

172
Core/HW/StereoResampler.cpp Normal file
View File

@ -0,0 +1,172 @@
// Copyright (c) 2015- PPSSPP Project and Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
// Adapted from Dolphin.
#include <string.h>
#include "base/logging.h"
#include "Common/ChunkFile.h"
#include "Common/MathUtil.h"
#include "Common/Atomics.h"
#include "Core/HW/StereoResampler.h"
#include "Globals.h"
#ifdef _M_SSE
#include <emmintrin.h>
#endif
inline void ClampBufferToS16(s16 *out, const s32 *in, size_t size) {
#ifdef _M_SSE
// Size will always be 16-byte aligned as the hwBlockSize is.
while (size >= 8) {
__m128i in1 = _mm_loadu_si128((__m128i *)in);
__m128i in2 = _mm_loadu_si128((__m128i *)(in + 4));
__m128i packed = _mm_packs_epi32(in1, in2);
_mm_storeu_si128((__m128i *)out, packed);
out += 8;
in += 8;
size -= 8;
}
for (size_t i = 0; i < size; i++) {
out[i] = clamp_s16(in[i]);
}
#else
for (size_t i = 0; i < size; i++) {
out[i] = clamp_s16(in[i]);
}
#endif
}
void StereoResampler::MixerFifo::Clear() {
memset(m_buffer, 0, sizeof(m_buffer));
}
// Executed from sound stream thread
unsigned int StereoResampler::MixerFifo::Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sample_rate) {
unsigned int currentSample = 0;
// Cache access in non-volatile variable
// This is the only function changing the read value, so it's safe to
// cache it locally although it's written here.
// The writing pointer will be modified outside, but it will only increase,
// so we will just ignore new written data while interpolating.
// Without this cache, the compiler wouldn't be allowed to optimize the
// interpolation loop.
u32 indexR = Common::AtomicLoad(m_indexR);
u32 indexW = Common::AtomicLoad(m_indexW);
// Drift prevention mechanism
float numLeft = (float)(((indexW - indexR) & INDEX_MASK) / 2);
m_numLeftI = (numLeft + m_numLeftI*(CONTROL_AVG - 1)) / CONTROL_AVG;
float offset = (m_numLeftI - LOW_WATERMARK) * CONTROL_FACTOR;
if (offset > MAX_FREQ_SHIFT) offset = MAX_FREQ_SHIFT;
if (offset < -MAX_FREQ_SHIFT) offset = -MAX_FREQ_SHIFT;
float aid_sample_rate = m_input_sample_rate + offset;
/* Hm?
u32 framelimit = SConfig::GetInstance().m_Framelimit;
if (consider_framelimit && framelimit > 1) {
aid_sample_rate = aid_sample_rate * (framelimit - 1) * 5 / 59.994;
}*/
const u32 ratio = (u32)(65536.0f * aid_sample_rate / (float)sample_rate);
// TODO: consider a higher-quality resampling algorithm.
// TODO: Add a fast path for 1:1.
for (; currentSample < numSamples * 2 && ((indexW - indexR) & INDEX_MASK) > 2; currentSample += 2) {
u32 indexR2 = indexR + 2; //next sample
s16 l1 = m_buffer[indexR & INDEX_MASK]; //current
s16 r1 = m_buffer[(indexR + 1) & INDEX_MASK]; //current
s16 l2 = m_buffer[indexR2 & INDEX_MASK]; //next
s16 r2 = m_buffer[(indexR2 + 1) & INDEX_MASK]; //next
int sampleL = ((l1 << 16) + (l2 - l1) * (u16)m_frac) >> 16;
int sampleR = ((r1 << 16) + (r2 - r1) * (u16)m_frac) >> 16;
samples[currentSample] = clamp_s16(sampleL); // Do we even need to clamp after interpolation?
samples[currentSample + 1] = clamp_s16(sampleR);
m_frac += ratio;
indexR += 2 * (u16)(m_frac >> 16);
m_frac &= 0xffff;
}
int realSamples = currentSample;
// Padding with the last value to reduce clicking
short s[2];
s[0] = clamp_s16(m_buffer[(indexR - 1) & INDEX_MASK]);
s[1] = clamp_s16(m_buffer[(indexR - 2) & INDEX_MASK]);
for (; currentSample < numSamples * 2; currentSample += 2) {
samples[currentSample] = s[0];
samples[currentSample + 1] = s[1];
}
// Flush cached variable
Common::AtomicStore(m_indexR, indexR);
//if (realSamples != numSamples * 2) {
// ILOG("Underrun! %i / %i", realSamples / 2, numSamples);
//}
return realSamples / 2;
}
unsigned int StereoResampler::Mix(short* samples, unsigned int num_samples, bool consider_framelimit, int sample_rate) {
if (!samples)
return 0;
return m_dma_mixer.Mix(samples, num_samples, consider_framelimit, sample_rate);
}
void StereoResampler::MixerFifo::PushSamples(const s32 *samples, unsigned int num_samples) {
// Cache access in non-volatile variable
// indexR isn't allowed to cache in the audio throttling loop as it
// needs to get updates to not deadlock.
u32 indexW = Common::AtomicLoad(m_indexW);
// Check if we have enough free space
// indexW == m_indexR results in empty buffer, so indexR must always be smaller than indexW
if (num_samples * 2 + ((indexW - Common::AtomicLoad(m_indexR)) & INDEX_MASK) >= MAX_SAMPLES * 2)
return;
// AyuanX: Actual re-sampling work has been moved to sound thread
// to alleviate the workload on main thread
// and we simply store raw data here to make fast mem copy
int over_bytes = num_samples * 4 - (MAX_SAMPLES * 2 - (indexW & INDEX_MASK)) * sizeof(short);
if (over_bytes > 0) {
ClampBufferToS16(&m_buffer[indexW & INDEX_MASK], samples, (num_samples * 4 - over_bytes) / 2);
ClampBufferToS16(&m_buffer[0], samples + (num_samples * 4 - over_bytes) / sizeof(short), over_bytes / 2);
} else {
ClampBufferToS16(&m_buffer[indexW & INDEX_MASK], samples, num_samples * 2);
}
Common::AtomicAdd(m_indexW, num_samples * 2);
}
void StereoResampler::PushSamples(const int *samples, unsigned int num_samples) {
m_dma_mixer.PushSamples(samples, num_samples);
}
void StereoResampler::MixerFifo::SetInputSampleRate(unsigned int rate) {
m_input_sample_rate = rate;
}
void StereoResampler::DoState(PointerWrap &p) {
auto s = p.Section("resampler", 1);
if (!s)
return;
}

91
Core/HW/StereoResampler.h Normal file
View File

@ -0,0 +1,91 @@
// Copyright (c) 2015- PPSSPP Project and Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
// Adapted from Dolphin.
#pragma once
#include <string>
#include "base/mutex.h"
#include "Common/ChunkFile.h"
#include "Common/CommonTypes.h"
// 16 bit Stereo
#define MAX_SAMPLES (2*(1024 * 2)) // 2*64ms - had to double it for nVidia Shield which has huge buffers
#define INDEX_MASK (MAX_SAMPLES * 2 - 1)
#define LOW_WATERMARK 1280 // 40 ms
#define MAX_FREQ_SHIFT 200 // per 32000 Hz
#define CONTROL_FACTOR 0.2f // in freq_shift per fifo size offset
#define CONTROL_AVG 32
class StereoResampler {
public:
StereoResampler()
: m_dma_mixer(this, 44100)
{
}
virtual ~StereoResampler() {}
// Called from audio threads
virtual unsigned int Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sampleRate);
// Called from main thread
// This clamps the samples to 16-bit before starting to work on them.
virtual void PushSamples(const s32* samples, unsigned int num_samples);
void Clear() {
m_dma_mixer.Clear();
}
void DoState(PointerWrap &p);
protected:
class MixerFifo {
public:
MixerFifo(StereoResampler *mixer, unsigned sample_rate)
: m_mixer(mixer)
, m_input_sample_rate(sample_rate)
, m_indexW(0)
, m_indexR(0)
, m_numLeftI(0.0f)
, m_frac(0)
{
memset(m_buffer, 0, sizeof(m_buffer));
}
void PushSamples(const s32* samples, unsigned int num_samples);
unsigned int Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sample_rate);
void SetInputSampleRate(unsigned int rate);
void Clear();
private:
StereoResampler *m_mixer;
unsigned m_input_sample_rate;
short m_buffer[MAX_SAMPLES * 2];
volatile u32 m_indexW;
volatile u32 m_indexR;
float m_numLeftI;
u32 m_frac;
};
MixerFifo m_dma_mixer;
};

View File

@ -428,8 +428,6 @@ void GameSettingsScreen::CreateViews() {
#endif
systemSettings->Add(new CheckBox(&g_Config.bSetRoundingMode, s->T("Respect FPU rounding (disable for old GEB saves)")))->OnClick.Handle(this, &GameSettingsScreen::OnJitAffectingSetting);
systemSettings->Add(new CheckBox(&g_Config.bAtomicAudioLocks, s->T("Atomic Audio locks (experimental)")))->SetEnabled(!PSP_IsInited());
systemSettings->Add(new ItemHeader(s->T("Developer Tools")));
systemSettings->Add(new Choice(s->T("Developer Tools")))->OnClick.Handle(this, &GameSettingsScreen::OnDeveloperTools);

View File

@ -142,6 +142,10 @@ static std::vector<PendingMessage> pendingMessages;
static Thin3DContext *thin3d;
static UIContext *uiContext;
#ifdef _WIN32
WindowsAudioBackend *winAudioBackend;
#endif
Thin3DContext *GetThin3D() {
return thin3d;
}
@ -219,20 +223,23 @@ std::string NativeQueryConfig(std::string query) {
sprintf(temp, "%i", scale);
return std::string(temp);
} else if (query == "force44khz") {
return std::string("0");
} else {
return std::string("");
return "";
}
}
int NativeMix(short *audio, int num_samples) {
if (GetUIState() == UISTATE_INGAME) {
num_samples = __AudioMix(audio, num_samples);
int sample_rate = System_GetPropertyInt(SYSPROP_AUDIO_SAMPLE_RATE);
num_samples = __AudioMix(audio, num_samples, sample_rate > 0 ? sample_rate : 44100);
} else {
MixBackgroundAudio(audio, num_samples);
}
#ifdef _WIN32
DSound_UpdateSound();
winAudioBackend->Update();
#endif
return num_samples;
@ -568,13 +575,15 @@ void NativeInitGraphics() {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
#ifdef _WIN32
DSound_StartSound(MainWindow::GetHWND(), &Win32Mix, 44100);
winAudioBackend = CreateAudioBackend(AUDIO_BACKEND_AUTO);
winAudioBackend->Init(MainWindow::GetHWND(), &Win32Mix, 44100);
#endif
}
void NativeShutdownGraphics() {
#ifdef _WIN32
DSound_StopSound();
delete winAudioBackend;
winAudioBackend = NULL;
#endif
screenManager->deviceLost();

View File

@ -8,97 +8,89 @@
#define BUFSIZE 0x4000
#define MAXWAIT 20 //ms
class DSoundState {
class DSoundAudioBackend : public WindowsAudioBackend {
public:
DSoundState(HWND window, StreamCallback _callback, int sampleRate);
bool Init(); // If fails, can safely delete the object
DSoundAudioBackend();
~DSoundAudioBackend() override;
bool createBuffer();
bool writeDataToBuffer(DWORD dwOffset, // Our own write cursor.
char* soundData, // Start of our data.
DWORD dwSoundBytes); // Size of block to copy.
inline int ModBufferSize(int x) { return (x + bufferSize) % bufferSize; }
int RunThread();
void UpdateSound();
void StopSound();
int GetCurSample();
int GetSampleRate() { return sampleRate; }
bool Init(HWND window, StreamCallback callback, int sampleRate) override; // If fails, can safely delete the object
void Update() override;
int GetSampleRate() override { return sampleRate_; }
private:
inline int ModBufferSize(int x) { return (x + bufferSize_) % bufferSize_; }
int RunThread();
static unsigned int WINAPI soundThread(void *param);
bool CreateBuffer();
bool WriteDataToBuffer(DWORD offset, // Our own write cursor.
char* soundData, // Start of our data.
DWORD soundBytes); // Size of block to copy.
CRITICAL_SECTION soundCriticalSection;
HWND window_;
HANDLE soundSyncEvent = NULL;
HANDLE hThread = NULL;
HANDLE soundSyncEvent_ = NULL;
HANDLE hThread_ = NULL;
StreamCallback callback;
StreamCallback callback_;
IDirectSound8 *ds = NULL;
IDirectSoundBuffer *dsBuffer = NULL;
IDirectSound8 *ds_ = NULL;
IDirectSoundBuffer *dsBuffer_ = NULL;
int bufferSize; // bytes
int totalRenderedBytes;
int sampleRate;
int bufferSize_; // bytes
int totalRenderedBytes_;
int sampleRate_;
volatile int threadData;
volatile int threadData_;
int currentPos;
int lastPos;
short realtimeBuffer[BUFSIZE * 2];
int currentPos_;
int lastPos_;
short realtimeBuffer_[BUFSIZE * 2];
};
// TODO: Get rid of this
static DSoundState *g_dsound;
static DSoundAudioBackend *g_dsound;
inline int RoundDown128(int x) {
return x & (~127);
}
int DSound_GetSampleRate() {
if (g_dsound) {
return g_dsound->GetSampleRate();
} else {
return 0;
}
}
bool DSoundState::createBuffer() {
bool DSoundAudioBackend::CreateBuffer() {
PCMWAVEFORMAT pcmwf;
DSBUFFERDESC dsbdesc;
memset(&pcmwf, 0, sizeof(PCMWAVEFORMAT));
memset(&dsbdesc, 0, sizeof(DSBUFFERDESC));
bufferSize = BUFSIZE;
bufferSize_ = BUFSIZE;
pcmwf.wf.wFormatTag = WAVE_FORMAT_PCM;
pcmwf.wf.nChannels = 2;
pcmwf.wf.nSamplesPerSec = sampleRate;
pcmwf.wf.nSamplesPerSec = sampleRate_;
pcmwf.wf.nBlockAlign = 4;
pcmwf.wf.nAvgBytesPerSec = pcmwf.wf.nSamplesPerSec * pcmwf.wf.nBlockAlign;
pcmwf.wBitsPerSample = 16;
dsbdesc.dwSize = sizeof(DSBUFFERDESC);
dsbdesc.dwFlags = DSBCAPS_GETCURRENTPOSITION2 | DSBCAPS_GLOBALFOCUS; // //DSBCAPS_CTRLPAN | DSBCAPS_CTRLVOLUME | DSBCAPS_CTRLFREQUENCY;
dsbdesc.dwBufferBytes = bufferSize; //FIX32(pcmwf.wf.nAvgBytesPerSec); //change to set buffer size
dsbdesc.dwBufferBytes = bufferSize_; //FIX32(pcmwf.wf.nAvgBytesPerSec); //change to set buffer size
dsbdesc.lpwfxFormat = (WAVEFORMATEX *)&pcmwf;
if (SUCCEEDED(ds->CreateSoundBuffer(&dsbdesc, &dsBuffer, NULL))) {
dsBuffer->SetCurrentPosition(0);
if (SUCCEEDED(ds_->CreateSoundBuffer(&dsbdesc, &dsBuffer_, NULL))) {
dsBuffer_->SetCurrentPosition(0);
return true;
} else {
dsBuffer = NULL;
dsBuffer_ = NULL;
return false;
}
}
bool DSoundState::writeDataToBuffer(DWORD dwOffset, // Our own write cursor.
bool DSoundAudioBackend::WriteDataToBuffer(DWORD offset, // Our own write cursor.
char* soundData, // Start of our data.
DWORD dwSoundBytes) { // Size of block to copy.
DWORD soundBytes) { // Size of block to copy.
void *ptr1, *ptr2;
DWORD numBytes1, numBytes2;
// Obtain memory address of write block. This will be in two parts if the block wraps around.
HRESULT hr = dsBuffer->Lock(dwOffset, dwSoundBytes, &ptr1, &numBytes1, &ptr2, &numBytes2, 0);
HRESULT hr = dsBuffer_->Lock(offset, soundBytes, &ptr1, &numBytes1, &ptr2, &numBytes2, 0);
// If the buffer was lost, restore and retry lock.
/*
@ -112,7 +104,7 @@ bool DSoundState::writeDataToBuffer(DWORD dwOffset, // Our own write cursor.
memcpy(ptr2, soundData+numBytes1, numBytes2);
// Release the data back to DirectSound.
dsBuffer->Unlock(ptr1, numBytes1, ptr2, numBytes2);
dsBuffer_->Unlock(ptr1, numBytes1, ptr2, numBytes2);
return true;
}/*
else
@ -124,156 +116,122 @@ bool DSoundState::writeDataToBuffer(DWORD dwOffset, // Our own write cursor.
return false;
}
unsigned int WINAPI soundThread(void *param) {
DSoundState *state = (DSoundState *)param;
unsigned int WINAPI DSoundAudioBackend::soundThread(void *param) {
DSoundAudioBackend *state = (DSoundAudioBackend *)param;
return state->RunThread();
}
int DSoundState::RunThread() {
int DSoundAudioBackend::RunThread() {
setCurrentThreadName("DSound");
currentPos = 0;
lastPos = 0;
currentPos_ = 0;
lastPos_ = 0;
//writeDataToBuffer(0,realtimeBuffer,bufferSize);
// dsBuffer->Lock(0, bufferSize, (void **)&p1, &num1, (void **)&p2, &num2, 0);
dsBuffer->Play(0,0,DSBPLAY_LOOPING);
dsBuffer_->Play(0,0,DSBPLAY_LOOPING);
while (!threadData) {
while (!threadData_) {
EnterCriticalSection(&soundCriticalSection);
dsBuffer->GetCurrentPosition((DWORD *)&currentPos, 0);
int numBytesToRender = RoundDown128(ModBufferSize(currentPos - lastPos));
dsBuffer_->GetCurrentPosition((DWORD *)&currentPos_, 0);
int numBytesToRender = RoundDown128(ModBufferSize(currentPos_ - lastPos_));
if (numBytesToRender >= 256) {
int numBytesRendered = 4 * (*callback)(realtimeBuffer, numBytesToRender >> 2, 16, 44100, 2);
int numBytesRendered = 4 * (*callback_)(realtimeBuffer_, numBytesToRender >> 2, 16, 44100, 2);
//We need to copy the full buffer, regardless of what the mixer claims to have filled
//If we don't do this then the sound will loop if the sound stops and the mixer writes only zeroes
numBytesRendered = numBytesToRender;
writeDataToBuffer(lastPos, (char *) realtimeBuffer, numBytesRendered);
WriteDataToBuffer(lastPos_, (char *) realtimeBuffer_, numBytesRendered);
currentPos = ModBufferSize(lastPos + numBytesRendered);
totalRenderedBytes += numBytesRendered;
currentPos_ = ModBufferSize(lastPos_ + numBytesRendered);
totalRenderedBytes_ += numBytesRendered;
lastPos = currentPos;
lastPos_ = currentPos_;
}
LeaveCriticalSection(&soundCriticalSection);
WaitForSingleObject(soundSyncEvent, MAXWAIT);
WaitForSingleObject(soundSyncEvent_, MAXWAIT);
}
dsBuffer->Stop();
dsBuffer_->Stop();
threadData = 2;
threadData_ = 2;
return 0;
}
DSoundState::DSoundState(HWND window, StreamCallback _callback, int sampleRate)
: window_(window), callback(_callback), sampleRate(sampleRate) {
callback = _callback;
threadData=0;
DSoundAudioBackend::DSoundAudioBackend() : threadData_(0), ds_(nullptr) {
}
bool DSoundState::Init() {
soundSyncEvent = CreateEvent(0, false, false, 0);
InitializeCriticalSection(&soundCriticalSection);
DSoundAudioBackend::~DSoundAudioBackend() {
if (!ds_)
return;
if (FAILED(DirectSoundCreate8(0,&ds,0))) {
CloseHandle(soundSyncEvent);
DeleteCriticalSection(&soundCriticalSection);
return false;
}
ds->SetCooperativeLevel(window_, DSSCL_PRIORITY);
if (!createBuffer())
return false;
DWORD num1;
short *p1;
dsBuffer->Lock(0, bufferSize, (void **)&p1, &num1, 0, 0, 0);
memset(p1,0,num1);
dsBuffer->Unlock(p1,num1,0,0);
totalRenderedBytes = -bufferSize;
hThread = (HANDLE)_beginthreadex(0, 0, soundThread, (void *)this, 0, 0);
SetThreadPriority(hThread, THREAD_PRIORITY_ABOVE_NORMAL);
return true;
}
void DSoundState::UpdateSound() {
if (soundSyncEvent != NULL)
SetEvent(soundSyncEvent);
}
void DSoundState::StopSound() {
if (!dsBuffer)
if (!dsBuffer_)
return;
EnterCriticalSection(&soundCriticalSection);
if (threadData == 0) {
threadData = 1;
if (threadData_ == 0) {
threadData_ = 1;
}
if (hThread != NULL) {
WaitForSingleObject(hThread, 1000);
CloseHandle(hThread);
hThread = NULL;
if (hThread_ != NULL) {
WaitForSingleObject(hThread_, 1000);
CloseHandle(hThread_);
hThread_ = NULL;
}
if (threadData == 2) {
if (dsBuffer != NULL)
dsBuffer->Release();
dsBuffer = NULL;
if (ds != NULL)
ds->Release();
ds = NULL;
if (threadData_ == 2) {
if (dsBuffer_ != NULL)
dsBuffer_->Release();
dsBuffer_ = NULL;
if (ds_ != NULL)
ds_->Release();
ds_ = NULL;
}
if (soundSyncEvent != NULL) {
CloseHandle(soundSyncEvent);
if (soundSyncEvent_ != NULL) {
CloseHandle(soundSyncEvent_);
}
soundSyncEvent = NULL;
soundSyncEvent_ = NULL;
LeaveCriticalSection(&soundCriticalSection);
DeleteCriticalSection(&soundCriticalSection);
}
int DSoundState::GetCurSample() {
EnterCriticalSection(&soundCriticalSection);
int playCursor;
dsBuffer->GetCurrentPosition((DWORD *)&playCursor,0);
playCursor = ModBufferSize(playCursor-lastPos)+totalRenderedBytes;
LeaveCriticalSection(&soundCriticalSection);
return playCursor;
}
void DSound_UpdateSound() {
if (g_dsound) {
g_dsound->UpdateSound();
}
}
bool DSound_StartSound(HWND window, StreamCallback _callback, int sampleRate) {
g_dsound = new DSoundState(window, _callback, sampleRate);
if (!g_dsound->Init()) {
delete g_dsound;
g_dsound = NULL;
bool DSoundAudioBackend::Init(HWND window, StreamCallback _callback, int sampleRate) {
window_ = window;
callback_ = _callback;
sampleRate_ = sampleRate;
threadData_ = 0;
if (FAILED(DirectSoundCreate8(0, &ds_, 0))) {
ds_ = NULL;
return false;
}
ds_->SetCooperativeLevel(window_, DSSCL_PRIORITY);
if (!CreateBuffer())
return false;
soundSyncEvent_ = CreateEvent(0, false, false, 0);
InitializeCriticalSection(&soundCriticalSection);
DWORD num1;
short *p1;
dsBuffer_->Lock(0, bufferSize_, (void **)&p1, &num1, 0, 0, 0);
memset(p1,0,num1);
dsBuffer_->Unlock(p1,num1,0,0);
totalRenderedBytes_ = -bufferSize_;
hThread_ = (HANDLE)_beginthreadex(0, 0, soundThread, (void *)this, 0, 0);
SetThreadPriority(hThread_, THREAD_PRIORITY_ABOVE_NORMAL);
return true;
}
void DSound_StopSound() {
g_dsound->StopSound();
delete g_dsound;
g_dsound = NULL;
void DSoundAudioBackend::Update() {
if (soundSyncEvent_ != NULL)
SetEvent(soundSyncEvent_);
}
int DSound_GetCurSample() {
return g_dsound->GetCurSample();
}
float DSound_GetTimer() {
return (float)g_dsound->GetCurSample()*(1.0f/(4.0f*44100.0f));
}
WindowsAudioBackend *CreateAudioBackend(AudioBackendType type) {
return new DSoundAudioBackend();
}

View File

@ -1,16 +1,23 @@
#ifndef __SOUNDSTREAM_H__
#define __SOUNDSTREAM_H__
#pragma once
#include "Common/CommonWindows.h"
typedef int (*StreamCallback)(short *buffer, int numSamples, int bits, int rate, int channels);
bool DSound_StartSound(HWND window, StreamCallback _callback, int sampleRate);
void DSound_UpdateSound();
void DSound_StopSound();
class WindowsAudioBackend {
public:
WindowsAudioBackend() {}
virtual ~WindowsAudioBackend() {}
virtual bool Init(HWND window, StreamCallback _callback, int sampleRate) = 0;
virtual void Update() {} // Doesn't have to do anything
virtual int GetSampleRate() = 0;
};
float DSound_GetTimer();
int DSound_GetCurSample();
int DSound_GetSampleRate();
#endif //__SOUNDSTREAM_H__
enum AudioBackendType {
AUDIO_BACKEND_DSOUND,
// AUDIO_BACKEND_WASAPI, // TODO
AUDIO_BACKEND_AUTO
};
// Factory
WindowsAudioBackend *CreateAudioBackend(AudioBackendType type);

View File

@ -126,9 +126,13 @@ void WindowsHost::InitSound()
{
}
// UGLY!
extern WindowsAudioBackend *winAudioBackend;
void WindowsHost::UpdateSound()
{
DSound_UpdateSound();
if (winAudioBackend)
winAudioBackend->Update();
}
void WindowsHost::ShutdownSound()

View File

@ -256,10 +256,13 @@ std::string System_GetProperty(SystemProperty prop) {
}
}
// Ugly!
extern WindowsAudioBackend *winAudioBackend;
int System_GetPropertyInt(SystemProperty prop) {
switch (prop) {
case SYSPROP_AUDIO_SAMPLE_RATE:
return DSound_GetSampleRate();
return winAudioBackend ? winAudioBackend->GetSampleRate() : -1;
default:
return -1;
}

View File

@ -187,6 +187,7 @@ EXEC_AND_LIB_FILES := \
$(SRC)/Core/HW/MpegDemux.cpp.arm \
$(SRC)/Core/HW/MediaEngine.cpp.arm \
$(SRC)/Core/HW/SasAudio.cpp.arm \
$(SRC)/Core/HW/StereoResampler.cpp.arm \
$(SRC)/Core/Core.cpp \
$(SRC)/Core/Config.cpp \
$(SRC)/Core/CoreTiming.cpp \

View File

@ -22,7 +22,14 @@ std::string System_GetProperty(SystemProperty prop) {
}
}
int System_GetPropertyInt(SystemProperty prop) { return -1; }
int System_GetPropertyInt(SystemProperty prop) {
switch (prop) {
case SYSPROP_AUDIO_SAMPLE_RATE:
return 44100;
default:
return -1;
}
}
void System_SendMessage(const char *command, const char *parameter) {
if (!strcmp(command, "finish")) {

2
native

@ -1 +1 @@
Subproject commit 1b5bf730caffa702de34a6db9f0ae93fbbd88eed
Subproject commit 84ed01df53cd557eb126c1f6badef7d9201add5f