Merge pull request #7315 from hrydgard/audio-resample

Use Dolphin's audio resampler to lower latency on Android and prevent drift
2025-03-05 06:59:38 +00:00 · 2015-01-11 21:10:08 +01:00 · 2015-01-11 21:10:08 +01:00 · 82ccce9eca
commit 82ccce9eca
parent 1b055fd07e c8dfcd5e74
20 changed files with 553 additions and 399 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1276,6 +1276,8 @@ add_library(${CoreLibName} ${CoreLinkType}
 	Core/HW/MemoryStick.h
 	Core/HW/SasAudio.cpp
 	Core/HW/SasAudio.h
+	Core/HW/StereoResampler.cpp
+	Core/HW/StereoResampler.h
 	Core/Host.cpp
 	Core/Host.h
 	Core/Loaders.cpp
--- a/Common/Atomic_GCC.h
+++ b/Common/Atomic_GCC.h
@ -1,29 +1,12 @@
-// Copyright (C) 2003 Dolphin Project.
+// Copyright 2013 Dolphin Emulator Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.

-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, version 2.0 or later versions.
-
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License 2.0 for more details.
-
-// A copy of the GPL 2.0 should have been included with the program.
-// If not, see http://www.gnu.org/licenses/
-
-// Official SVN repository and contact information can be found at
-// http://code.google.com/p/dolphin-emu/
+// IWYU pragma: private, include "Common/Atomic.h"

 #pragma once

-#ifdef BLACKBERRY
-#include <atomic.h>
-#elif defined(__SYMBIAN32__)
-#include <e32atomics.h>
-#endif
-
-#include "Common.h"
+#include "CommonTypes.h"

 // Atomic operations are performed in a single step by the CPU. It is
 // impossible for other threads to see the operation "half-done."
@ -42,53 +25,78 @@
 namespace Common
 {

-inline void AtomicAdd(volatile u32& target, u32 value) {
+inline void AtomicAdd(volatile u32& target, u32 value)
+{
 	__sync_add_and_fetch(&target, value);
 }

-inline void AtomicAnd(volatile u32& target, u32 value) {
+inline void AtomicAnd(volatile u32& target, u32 value)
+{
 	__sync_and_and_fetch(&target, value);
 }

-inline void AtomicDecrement(volatile u32& target) {
+inline void AtomicDecrement(volatile u32& target)
+{
 	__sync_add_and_fetch(&target, -1);
 }

-inline void AtomicIncrement(volatile u32& target) {
+inline void AtomicIncrement(volatile u32& target)
+{
 	__sync_add_and_fetch(&target, 1);
 }

-inline u32 AtomicLoad(volatile u32& src) {
-	return src; // 32-bit reads are always atomic.
-}
-inline u32 AtomicLoadAcquire(volatile u32& src) {
-#ifdef __SYMBIAN32__
-	return __e32_atomic_load_acq32(&src);
-#else
-	//keep the compiler from caching any memory references
-	u32 result = src; // 32-bit reads are always atomic.
-	//__sync_synchronize(); // TODO: May not be necessary.
-	// Compiler instruction only. x86 loads always have acquire semantics.
-	__asm__ __volatile__ ( "":::"memory" );
-	return result;
-#endif
-}
-
-inline void AtomicOr(volatile u32& target, u32 value) {
+inline void AtomicOr(volatile u32& target, u32 value)
+{
 	__sync_or_and_fetch(&target, value);
 }

-inline void AtomicStore(volatile u32& dest, u32 value) {
-	dest = value; // 32-bit writes are always atomic.
+// Support clang versions older than 3.4.
+#if __clang__ 
+#if !__has_feature(cxx_atomic)
+template <typename T>
+_Atomic(T)* ToC11Atomic(volatile T* loc)
+{
+	return (_Atomic(T)*) loc;
 }
-inline void AtomicStoreRelease(volatile u32& dest, u32 value) {
-#ifdef BLACKBERRY
-	atomic_set(&dest, value);
-#elif defined(__SYMBIAN32__)
-	__e32_atomic_store_rel32(&dest, value);
-#else
-	__sync_lock_test_and_set(&dest, value); // TODO: Wrong! This function has acquire semantics.
+
+#define __atomic_load_n(p, m) __c11_atomic_load(ToC11Atomic(p), m)
+#define __atomic_store_n(p, v, m) __c11_atomic_store(ToC11Atomic(p), v, m)
+#define __atomic_exchange_n(p, v, m) __c11_atomic_exchange(ToC11Atomic(p), v, m)
 #endif
+#endif
+
+#ifndef __ATOMIC_RELAXED
+#error __ATOMIC_RELAXED not defined; your compiler version is too old.
+#endif
+
+template <typename T>
+inline T AtomicLoad(volatile T& src)
+{
+	return __atomic_load_n(&src, __ATOMIC_RELAXED);
+}
+
+template <typename T>
+inline T AtomicLoadAcquire(volatile T& src)
+{
+	return __atomic_load_n(&src, __ATOMIC_ACQUIRE);
+}
+
+template <typename T, typename U>
+inline void AtomicStore(volatile T& dest, U value)
+{
+	__atomic_store_n(&dest, value, __ATOMIC_RELAXED);
+}
+
+template <typename T, typename U>
+inline void AtomicStoreRelease(volatile T& dest, U value)
+{
+	__atomic_store_n(&dest, value, __ATOMIC_RELEASE);
+}
+
+template <typename T, typename U>
+inline T* AtomicExchangeAcquire(T* volatile& loc, U newval)
+{
+	return __atomic_exchange_n(&loc, newval, __ATOMIC_ACQ_REL);
 }

 }
--- a/Common/Atomic_Win32.h
+++ b/Common/Atomic_Win32.h
@ -1,29 +1,15 @@
-// Copyright (C) 2003 Dolphin Project.
+// Copyright 2013 Dolphin Emulator Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.

-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, version 2.0 or later versions.
-
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License 2.0 for more details.
-
-// A copy of the GPL 2.0 should have been included with the program.
-// If not, see http://www.gnu.org/licenses/
-
-// Official SVN repository and contact information can be found at
-// http://code.google.com/p/dolphin-emu/
+// IWYU pragma: private, include "Common/Atomic.h"

 #pragma once

-#include "Common.h"
-#ifndef _XBOX
 #include <intrin.h>
-#else
-#include <ppcintrinsics.h>
-#endif
-#include "CommonWindows.h"
+#include <Windows.h>
+
+#include "CommonTypes.h"

 // Atomic operations are performed in a single step by the CPU. It is
 // impossible for other threads to see the operation "half-done."
@ -46,41 +32,66 @@
 namespace Common
 {

-inline void AtomicAdd(volatile u32& target, u32 value) {
-	InterlockedExchangeAdd((volatile LONG*)&target, (LONG)value);
+inline void AtomicAdd(volatile u32& target, u32 value)
+{
+	_InterlockedExchangeAdd((volatile LONG*)&target, (LONG)value);
 }

-inline void AtomicAnd(volatile u32& target, u32 value) {
+inline void AtomicAnd(volatile u32& target, u32 value)
+{
 	_InterlockedAnd((volatile LONG*)&target, (LONG)value);
 }

-inline void AtomicIncrement(volatile u32& target) {
-	InterlockedIncrement((volatile LONG*)&target);
+inline void AtomicIncrement(volatile u32& target)
+{
+	_InterlockedIncrement((volatile LONG*)&target);
 }

-inline void AtomicDecrement(volatile u32& target) {
-	InterlockedDecrement((volatile LONG*)&target);
+inline void AtomicDecrement(volatile u32& target)
+{
+	_InterlockedDecrement((volatile LONG*)&target);
 }

-inline u32 AtomicLoad(volatile u32& src) {
+inline void AtomicOr(volatile u32& target, u32 value)
+{
+	_InterlockedOr((volatile LONG*)&target, (LONG)value);
+}
+
+// For the comment below to hold, better only use this with 32-bit types..
+template <typename T>
+inline T AtomicLoad(volatile T& src)
+{
 	return src; // 32-bit reads are always atomic.
 }
-inline u32 AtomicLoadAcquire(volatile u32& src) {
-	u32 result = src; // 32-bit reads are always atomic.
+
+// For the comment below to hold, better only use this with 32-bit types..
+template <typename T>
+inline T AtomicLoadAcquire(volatile T& src)
+{
+	T result = src; // 32-bit reads are always atomic.
 	_ReadBarrier(); // Compiler instruction only. x86 loads always have acquire semantics.
 	return result;
 }

-inline void AtomicOr(volatile u32& target, u32 value) {
-	_InterlockedOr((volatile LONG*)&target, (LONG)value);
+// For the comment below to hold, better only use this with 32-bit types..
+template <typename T, typename U>
+inline void AtomicStore(volatile T& dest, U value)
+{
+	dest = (T)value; // 32-bit writes are always atomic.
 }

-inline void AtomicStore(volatile u32& dest, u32 value) {
-	dest = value; // 32-bit writes are always atomic.
-}
-inline void AtomicStoreRelease(volatile u32& dest, u32 value) {
+// For the comment below to hold, better only use this with 32-bit types..
+template <typename T, typename U>
+inline void AtomicStoreRelease(volatile T& dest, U value)
+{
 	_WriteBarrier(); // Compiler instruction only. x86 stores always have release semantics.
-	dest = value; // 32-bit writes are always atomic.
+	dest = (T)value; // 32-bit writes are always atomic
+}
+
+template <typename T, typename U>
+inline T* AtomicExchangeAcquire(T* volatile& loc, U newval)
+{
+	return (T*)_InterlockedExchangePointer_acq((void* volatile*)&loc, (void*)newval);
 }

 }
--- a/Core/Config.cpp
+++ b/Core/Config.cpp
@ -334,8 +334,6 @@ static bool DefaultForceFlushToZero() {
 static ConfigSetting cpuSettings[] = {
 	ReportedConfigSetting("Jit", &g_Config.bJit, &DefaultJit, true, true),
 	ReportedConfigSetting("SeparateCPUThread", &g_Config.bSeparateCPUThread, false, true, true),
-	ConfigSetting("AtomicAudioLocks", &g_Config.bAtomicAudioLocks, false, true, true),
-
 	ReportedConfigSetting("SeparateIOThread", &g_Config.bSeparateIOThread, true, true, true),
 	ReportedConfigSetting("IOTimingMethod", &g_Config.iIOTimingMethod, IOTIMING_FAST, true, true),
 	ConfigSetting("FastMemoryAccess", &g_Config.bFastMemory, true, true, true),
--- a/Core/Config.h
+++ b/Core/Config.h
@ -118,7 +118,6 @@ public:
 	bool bSeparateCPUThread;
 	int iIOTimingMethod;
 	bool bSeparateIOThread;
-	bool bAtomicAudioLocks;
 	int iLockedCPUSpeed;
 	bool bAutoSaveSymbolMap;
 	bool bCacheFullIsoInRam;
--- a/Core/Core.vcxproj
+++ b/Core/Core.vcxproj
@ -274,6 +274,7 @@
    <ClCompile Include="HW\SasAudio.cpp" />
    <ClCompile Include="HW\AsyncIOManager.cpp" />
    <ClCompile Include="HW\SimpleAudioDec.cpp" />
+    <ClCompile Include="HW\StereoResampler.cpp" />
    <ClCompile Include="Loaders.cpp" />
    <ClCompile Include="MemMap.cpp" />
    <ClCompile Include="MemmapFunctions.cpp" />
@ -512,6 +513,7 @@
    <ClInclude Include="HW\MemoryStick.h" />
    <ClInclude Include="HW\AsyncIOManager.h" />
    <ClInclude Include="HW\SimpleAudioDec.h" />
+    <ClInclude Include="HW\StereoResampler.h" />
    <ClInclude Include="Loaders.h" />
    <ClInclude Include="MemMap.h" />
    <ClInclude Include="MIPS\ARM\ArmAsm.h">
--- a/Core/Core.vcxproj.filters
+++ b/Core/Core.vcxproj.filters
@ -300,6 +300,9 @@
    <ClCompile Include="HW\MediaEngine.cpp">
      <Filter>HW</Filter>
    </ClCompile>
+    <ClCompile Include="HW\StereoResampler.cpp">
+      <Filter>HW</Filter>
+    </ClCompile>
    <ClCompile Include="Util\PPGeDraw.cpp">
      <Filter>Util</Filter>
    </ClCompile>
@ -775,6 +778,9 @@
    <ClInclude Include="HW\MediaEngine.h">
      <Filter>HW</Filter>
    </ClInclude>
+    <ClInclude Include="HW\StereoResampler.h">
+      <Filter>HW</Filter>
+    </ClInclude>
    <ClInclude Include="Util\PPGeDraw.h">
      <Filter>Util</Filter>
    </ClInclude>
--- a/Core/HLE/__sceAudio.cpp
+++ b/Core/HLE/__sceAudio.cpp
@ -35,7 +35,9 @@
 #include "Core/HLE/sceAudio.h"
 #include "Core/HLE/sceKernel.h"
 #include "Core/HLE/sceKernelThread.h"
+#include "Core/HW/StereoResampler.h"

+StereoResampler resampler;

 // Should be used to lock anything related to the outAudioQueue.
 // atomic locks are used on the lock. TODO: make this lock-free
@ -67,14 +69,6 @@ static s32 *mixBuffer;
 static int chanQueueMaxSizeFactor;
 static int chanQueueMinSizeFactor;

-// TODO: Need to replace this with something lockless. Mutexes in the audio pipeline
-// is bad mojo.
-FixedSizeQueue<s16, 512 * 16> outAudioQueue;
-
-bool __gainAudioQueueLock();
-void __releaseAcquiredLock();
-void __blockForAudioQueueLock();
-
 static inline s16 adjustvolume(s16 sample, int vol) {
 #ifdef ARM
 	register int r;
@ -181,9 +175,7 @@ void __AudioInit() {
 	mixBuffer = new s32[hwBlockSize * 2];
 	memset(mixBuffer, 0, hwBlockSize * 2 * sizeof(s32));

-	__blockForAudioQueueLock();
-	outAudioQueue.clear();
-	__releaseAcquiredLock();
+	resampler.Clear();
 	CoreTiming::RegisterMHzChangeCallback(&__AudioCPUMHzChange);
 }

@ -199,16 +191,14 @@ void __AudioDoState(PointerWrap &p) {

 	p.Do(mixFrequency);

-	{	
-		//block until a lock is achieved. Not a good idea at all, but
-		//can't think of a better one...
-		__blockForAudioQueueLock();
-
+	if (s >= 2) {
+		resampler.DoState(p);
+	} else {
+		// Only to preserve the previous file format. Might cause a slight audio glitch on upgrades?
+		FixedSizeQueue<s16, 512 * 16> outAudioQueue;
 		outAudioQueue.DoState(p);

-		//release the atomic lock
-		__releaseAcquiredLock();
-		
+		resampler.Clear();
 	}

 	int chanCount = ARRAY_SIZE(chans);
@ -358,28 +348,6 @@ void __AudioSetOutputFrequency(int freq) {
 	mixFrequency = freq;
 }

-inline void ClampBufferToS16(s16 *out, s32 *in, size_t size) {
-#ifdef _M_SSE
-	// Size will always be 16-byte aligned as the hwBlockSize is.
-	while (size >= 8) {
-		__m128i in1 = _mm_loadu_si128((__m128i *)in);
-		__m128i in2 = _mm_loadu_si128((__m128i *)(in + 4));
-		__m128i packed = _mm_packs_epi32(in1, in2);
-		_mm_storeu_si128((__m128i *)out, packed);
-		out += 8;
-		in += 8;
-		size -= 8;
-	}
-	for (size_t i = 0; i < size; i++) {
-		out[i] = clamp_s16(in[i]);
-	}
-#else
-	for (size_t i = 0; i < size; i++) {
-		out[i] = clamp_s16(in[i]);
-	}
-#endif
-}
-
 // Mix samples from the various audio channels into a single sample queue.
 // This single sample queue is where __AudioMix should read from. If the sample queue is full, we should
 // just sleep the main emulator thread a little.
@ -433,103 +401,13 @@ void __AudioUpdate() {
 	}

 	if (g_Config.bEnableSound) {
-
-		__blockForAudioQueueLock();
-		/*
-		if (!__gainAudioQueueLock()){
-			return;
-		}
-		*/
-
-		if (outAudioQueue.room() >= hwBlockSize * 2) {
-			s16 *buf1 = 0, *buf2 = 0;
-			size_t sz1, sz2;
-			outAudioQueue.pushPointers(hwBlockSize * 2, &buf1, &sz1, &buf2, &sz2);
-			ClampBufferToS16(buf1, mixBuffer, sz1);
-			if (buf2) {
-				ClampBufferToS16(buf2, mixBuffer + sz1, sz2);
-			}
-		} else {
-			// This happens quite a lot. There's still something slightly off
-			// about the amount of audio we produce.
-		}
-		//release the atomic lock
-		__releaseAcquiredLock();
+		resampler.PushSamples(mixBuffer, hwBlockSize);
 	}
 }

 // numFrames is number of stereo frames.
 // This is called from *outside* the emulator thread.
-int __AudioMix(short *outstereo, int numFrames)
-{
-	// TODO: if mixFrequency != the actual output frequency, resample!
-	int underrun = -1;
-	s16 sampleL = 0;
-	s16 sampleR = 0;
-
-	const s16 *buf1 = 0, *buf2 = 0;
-	size_t sz1, sz2;
-	{
-		//TODO: do rigorous testing to see whether just blind locking will improve speed.
-		if (!__gainAudioQueueLock()){
-			 memset(outstereo, 0, numFrames * 2 * sizeof(short)); 
-			 return 0;
-		}
-		
-		outAudioQueue.popPointers(numFrames * 2, &buf1, &sz1, &buf2, &sz2);
-
-		memcpy(outstereo, buf1, sz1 * sizeof(s16));
-		if (buf2) {
-			memcpy(outstereo + sz1, buf2, sz2 * sizeof(s16));
-		}
-
-		//release the atomic lock
-		__releaseAcquiredLock();
-	}
-
-	int remains = (int)(numFrames * 2 - sz1 - sz2);
-	if (remains > 0)
-		memset(outstereo + numFrames * 2 - remains, 0, remains*sizeof(s16));
-
-	if (sz1 + sz2 < (size_t)numFrames) {
-		underrun = (int)(sz1 + sz2) / 2;
-		VERBOSE_LOG(SCEAUDIO, "Audio out buffer UNDERRUN at %i of %i", underrun, numFrames);
-	}
-	return underrun >= 0 ? underrun : numFrames;
-}
-
-
-
-/*returns whether the lock was successfully gained or not.
-i.e - whether the lock belongs to you 
-*/
-inline bool __gainAudioQueueLock(){
-	if (g_Config.bAtomicAudioLocks){
-		/*if the previous state was 0, that means the lock was "unlocked". So,
-		we return !0, which is true thanks to C's int to bool conversion
-
-		One the other hand, if it was locked, then the lock would return 1.
-		so, !1 = 0 = false.
-		*/		
-		return atomicLock_.test_and_set() == 0;
-	} else {
-		mutex_.lock();
-		return true;
-	}
-};
-
-inline void __releaseAcquiredLock(){
-	if (g_Config.bAtomicAudioLocks){
-		atomicLock_.clear();
-	} else {
-		mutex_.unlock();
-	}
-}
-
-inline void __blockForAudioQueueLock(){
-	if (g_Config.bAtomicAudioLocks){
-		while ((atomicLock_.test_and_set() == 0)){ }
-	} else {
-		mutex_.lock();
-	}
+int __AudioMix(short *outstereo, int numFrames, int sampleRate) {
+	resampler.Mix(outstereo, numFrames, false, sampleRate);
+	return numFrames;
 }
--- a/Core/HLE/__sceAudio.h
+++ b/Core/HLE/__sceAudio.h
@ -32,4 +32,4 @@ u32 __AudioEnqueue(AudioChannel &chan, int chanNum, bool blocking);
 void __AudioWakeThreads(AudioChannel &chan, int result, int step);
 void __AudioWakeThreads(AudioChannel &chan, int result);

-int __AudioMix(short *outstereo, int numSamples);
+int __AudioMix(short *outstereo, int numSamples, int sampleRate);
--- a/Core/HW/StereoResampler.cpp
+++ b/Core/HW/StereoResampler.cpp
@ -0,0 +1,172 @@
+// Copyright (c) 2015- PPSSPP Project and Dolphin Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+// Adapted from Dolphin.
+
+#include <string.h>
+
+#include "base/logging.h"
+#include "Common/ChunkFile.h"
+#include "Common/MathUtil.h"
+#include "Common/Atomics.h"
+#include "Core/HW/StereoResampler.h"
+#include "Globals.h"
+
+#ifdef _M_SSE
+#include <emmintrin.h>
+#endif
+
+inline void ClampBufferToS16(s16 *out, const s32 *in, size_t size) {
+#ifdef _M_SSE
+	// Size will always be 16-byte aligned as the hwBlockSize is.
+	while (size >= 8) {
+		__m128i in1 = _mm_loadu_si128((__m128i *)in);
+		__m128i in2 = _mm_loadu_si128((__m128i *)(in + 4));
+		__m128i packed = _mm_packs_epi32(in1, in2);
+		_mm_storeu_si128((__m128i *)out, packed);
+		out += 8;
+		in += 8;
+		size -= 8;
+	}
+	for (size_t i = 0; i < size; i++) {
+		out[i] = clamp_s16(in[i]);
+	}
+#else
+	for (size_t i = 0; i < size; i++) {
+		out[i] = clamp_s16(in[i]);
+	}
+#endif
+}
+
+void StereoResampler::MixerFifo::Clear() {
+	memset(m_buffer, 0, sizeof(m_buffer));
+}
+
+// Executed from sound stream thread
+unsigned int StereoResampler::MixerFifo::Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sample_rate) {
+	unsigned int currentSample = 0;
+
+	// Cache access in non-volatile variable
+	// This is the only function changing the read value, so it's safe to
+	// cache it locally although it's written here.
+	// The writing pointer will be modified outside, but it will only increase,
+	// so we will just ignore new written data while interpolating.
+	// Without this cache, the compiler wouldn't be allowed to optimize the
+	// interpolation loop.
+	u32 indexR = Common::AtomicLoad(m_indexR);
+	u32 indexW = Common::AtomicLoad(m_indexW);
+
+	// Drift prevention mechanism
+	float numLeft = (float)(((indexW - indexR) & INDEX_MASK) / 2);
+	m_numLeftI = (numLeft + m_numLeftI*(CONTROL_AVG - 1)) / CONTROL_AVG;
+	float offset = (m_numLeftI - LOW_WATERMARK) * CONTROL_FACTOR;
+	if (offset > MAX_FREQ_SHIFT) offset = MAX_FREQ_SHIFT;
+	if (offset < -MAX_FREQ_SHIFT) offset = -MAX_FREQ_SHIFT;
+
+	float aid_sample_rate = m_input_sample_rate + offset;
+	
+	/* Hm?
+	u32 framelimit = SConfig::GetInstance().m_Framelimit;
+	if (consider_framelimit && framelimit > 1) {
+		aid_sample_rate = aid_sample_rate * (framelimit - 1) * 5 / 59.994;
+	}*/
+
+	const u32 ratio = (u32)(65536.0f * aid_sample_rate / (float)sample_rate);
+
+	// TODO: consider a higher-quality resampling algorithm.
+	// TODO: Add a fast path for 1:1.
+	for (; currentSample < numSamples * 2 && ((indexW - indexR) & INDEX_MASK) > 2; currentSample += 2) {
+		u32 indexR2 = indexR + 2; //next sample
+		s16 l1 = m_buffer[indexR & INDEX_MASK]; //current
+		s16 r1 = m_buffer[(indexR + 1) & INDEX_MASK]; //current
+		s16 l2 = m_buffer[indexR2 & INDEX_MASK]; //next
+		s16 r2 = m_buffer[(indexR2 + 1) & INDEX_MASK]; //next
+		int sampleL = ((l1 << 16) + (l2 - l1) * (u16)m_frac) >> 16;
+		int sampleR = ((r1 << 16) + (r2 - r1) * (u16)m_frac) >> 16;
+		samples[currentSample] = clamp_s16(sampleL);  // Do we even need to clamp after interpolation?
+		samples[currentSample + 1] = clamp_s16(sampleR);
+		m_frac += ratio;
+		indexR += 2 * (u16)(m_frac >> 16);
+		m_frac &= 0xffff;
+	}
+
+	int realSamples = currentSample;
+
+	// Padding with the last value to reduce clicking
+	short s[2];
+	s[0] = clamp_s16(m_buffer[(indexR - 1) & INDEX_MASK]);
+	s[1] = clamp_s16(m_buffer[(indexR - 2) & INDEX_MASK]);
+	for (; currentSample < numSamples * 2; currentSample += 2) {
+		samples[currentSample] = s[0];
+		samples[currentSample + 1] = s[1];
+	}
+
+	// Flush cached variable
+	Common::AtomicStore(m_indexR, indexR);
+
+	//if (realSamples != numSamples * 2) {
+	//	ILOG("Underrun! %i / %i", realSamples / 2, numSamples);
+	//}
+
+	return realSamples / 2;
+}
+
+unsigned int StereoResampler::Mix(short* samples, unsigned int num_samples, bool consider_framelimit, int sample_rate) {
+	if (!samples)
+		return 0;
+
+	return m_dma_mixer.Mix(samples, num_samples, consider_framelimit, sample_rate);
+}
+
+void StereoResampler::MixerFifo::PushSamples(const s32 *samples, unsigned int num_samples) {
+	// Cache access in non-volatile variable
+	// indexR isn't allowed to cache in the audio throttling loop as it
+	// needs to get updates to not deadlock.
+	u32 indexW = Common::AtomicLoad(m_indexW);
+
+	// Check if we have enough free space
+	// indexW == m_indexR results in empty buffer, so indexR must always be smaller than indexW
+	if (num_samples * 2 + ((indexW - Common::AtomicLoad(m_indexR)) & INDEX_MASK) >= MAX_SAMPLES * 2)
+		return;
+
+	// AyuanX: Actual re-sampling work has been moved to sound thread
+	// to alleviate the workload on main thread
+	// and we simply store raw data here to make fast mem copy
+	int over_bytes = num_samples * 4 - (MAX_SAMPLES * 2 - (indexW & INDEX_MASK)) * sizeof(short);
+	if (over_bytes > 0) {
+		ClampBufferToS16(&m_buffer[indexW & INDEX_MASK], samples, (num_samples * 4 - over_bytes) / 2);
+		ClampBufferToS16(&m_buffer[0], samples + (num_samples * 4 - over_bytes) / sizeof(short), over_bytes / 2);
+	} else {
+		ClampBufferToS16(&m_buffer[indexW & INDEX_MASK], samples, num_samples * 2);
+	}
+
+	Common::AtomicAdd(m_indexW, num_samples * 2);
+}
+
+void StereoResampler::PushSamples(const int *samples, unsigned int num_samples) {
+	m_dma_mixer.PushSamples(samples, num_samples);
+}
+
+void StereoResampler::MixerFifo::SetInputSampleRate(unsigned int rate) {
+	m_input_sample_rate = rate;
+}
+
+void StereoResampler::DoState(PointerWrap &p) {
+	auto s = p.Section("resampler", 1);
+	if (!s)
+		return;
+}
--- a/Core/HW/StereoResampler.h
+++ b/Core/HW/StereoResampler.h
@ -0,0 +1,91 @@
+// Copyright (c) 2015- PPSSPP Project and Dolphin Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+// Adapted from Dolphin.
+
+#pragma once
+
+#include <string>
+
+#include "base/mutex.h"
+
+#include "Common/ChunkFile.h"
+#include "Common/CommonTypes.h"
+
+// 16 bit Stereo
+
+#define MAX_SAMPLES     (2*(1024 * 2)) // 2*64ms - had to double it for nVidia Shield which has huge buffers
+#define INDEX_MASK      (MAX_SAMPLES * 2 - 1)
+
+#define LOW_WATERMARK   1280 // 40 ms
+#define MAX_FREQ_SHIFT  200  // per 32000 Hz
+#define CONTROL_FACTOR  0.2f // in freq_shift per fifo size offset
+#define CONTROL_AVG     32
+
+class StereoResampler {
+
+public:
+	StereoResampler()
+		: m_dma_mixer(this, 44100)
+	{
+	}
+
+	virtual ~StereoResampler() {}
+
+	// Called from audio threads
+	virtual unsigned int Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sampleRate);
+
+	// Called from main thread
+	// This clamps the samples to 16-bit before starting to work on them.
+	virtual void PushSamples(const s32* samples, unsigned int num_samples);
+
+	void Clear() {
+		m_dma_mixer.Clear();
+	}
+
+	void DoState(PointerWrap &p);
+
+protected:
+	class MixerFifo {
+	public:
+		MixerFifo(StereoResampler *mixer, unsigned sample_rate)
+			: m_mixer(mixer)
+			, m_input_sample_rate(sample_rate)
+			, m_indexW(0)
+			, m_indexR(0)
+			, m_numLeftI(0.0f)
+			, m_frac(0)
+		{
+			memset(m_buffer, 0, sizeof(m_buffer));
+		}
+		void PushSamples(const s32* samples, unsigned int num_samples);
+		unsigned int Mix(short* samples, unsigned int numSamples, bool consider_framelimit, int sample_rate);
+		void SetInputSampleRate(unsigned int rate);
+		void Clear();
+
+	private:
+		StereoResampler *m_mixer;
+		unsigned m_input_sample_rate;
+		short m_buffer[MAX_SAMPLES * 2];
+		volatile u32 m_indexW;
+		volatile u32 m_indexR;
+		float m_numLeftI;
+		u32 m_frac;
+	};
+
+	MixerFifo m_dma_mixer;
+};
--- a/UI/GameSettingsScreen.cpp
+++ b/UI/GameSettingsScreen.cpp
@ -428,8 +428,6 @@ void GameSettingsScreen::CreateViews() {
 #endif
 	systemSettings->Add(new CheckBox(&g_Config.bSetRoundingMode, s->T("Respect FPU rounding (disable for old GEB saves)")))->OnClick.Handle(this, &GameSettingsScreen::OnJitAffectingSetting);

-	systemSettings->Add(new CheckBox(&g_Config.bAtomicAudioLocks, s->T("Atomic Audio locks (experimental)")))->SetEnabled(!PSP_IsInited());
-
 	systemSettings->Add(new ItemHeader(s->T("Developer Tools")));
 	systemSettings->Add(new Choice(s->T("Developer Tools")))->OnClick.Handle(this, &GameSettingsScreen::OnDeveloperTools);

--- a/UI/NativeApp.cpp
+++ b/UI/NativeApp.cpp
@ -142,6 +142,10 @@ static std::vector<PendingMessage> pendingMessages;
 static Thin3DContext *thin3d;
 static UIContext *uiContext;

+#ifdef _WIN32
+WindowsAudioBackend *winAudioBackend;
+#endif
+
 Thin3DContext *GetThin3D() {
 	return thin3d;
 }
@ -219,20 +223,23 @@ std::string NativeQueryConfig(std::string query) {

 		sprintf(temp, "%i", scale);
 		return std::string(temp);
+	} else if (query == "force44khz") {
+		return std::string("0");
 	} else {
-		return std::string("");
+		return "";
 	}
 }

 int NativeMix(short *audio, int num_samples) {
 	if (GetUIState() == UISTATE_INGAME) {
-		num_samples = __AudioMix(audio, num_samples);
+		int sample_rate = System_GetPropertyInt(SYSPROP_AUDIO_SAMPLE_RATE);
+		num_samples = __AudioMix(audio, num_samples, sample_rate > 0 ? sample_rate : 44100);
 	}	else {
 		MixBackgroundAudio(audio, num_samples);
 	}

 #ifdef _WIN32
-	DSound_UpdateSound();
+	winAudioBackend->Update();
 #endif

 	return num_samples;
@ -568,13 +575,15 @@ void NativeInitGraphics() {
 	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);

 #ifdef _WIN32
-	DSound_StartSound(MainWindow::GetHWND(), &Win32Mix, 44100);
+	winAudioBackend = CreateAudioBackend(AUDIO_BACKEND_AUTO);
+	winAudioBackend->Init(MainWindow::GetHWND(), &Win32Mix, 44100);
 #endif
 }

 void NativeShutdownGraphics() {
 #ifdef _WIN32
-	DSound_StopSound();
+	delete winAudioBackend;
+	winAudioBackend = NULL;
 #endif

 	screenManager->deviceLost();
--- a/Windows/DSoundStream.cpp
+++ b/Windows/DSoundStream.cpp
@ -8,97 +8,89 @@
 #define BUFSIZE 0x4000
 #define MAXWAIT 20   //ms

-class DSoundState {
+class DSoundAudioBackend : public WindowsAudioBackend {
 public:
-	DSoundState(HWND window, StreamCallback _callback, int sampleRate);
-	bool Init();  // If fails, can safely delete the object
+	DSoundAudioBackend();
+	~DSoundAudioBackend() override;

-	bool createBuffer();
-	bool writeDataToBuffer(DWORD dwOffset, // Our own write cursor.
-		char* soundData, // Start of our data.
-		DWORD dwSoundBytes); // Size of block to copy.
-
-	inline int ModBufferSize(int x) { return (x + bufferSize) % bufferSize; }
-	int RunThread();
-	void UpdateSound();
-	void StopSound();
-	int GetCurSample();
-	int GetSampleRate() { return sampleRate; }
+	bool Init(HWND window, StreamCallback callback, int sampleRate) override;  // If fails, can safely delete the object
+	void Update() override;
+	int GetSampleRate() override { return sampleRate_; }

 private:
+	inline int ModBufferSize(int x) { return (x + bufferSize_) % bufferSize_; }
+	int RunThread();
+	static unsigned int WINAPI soundThread(void *param);
+	bool CreateBuffer();
+	bool WriteDataToBuffer(DWORD offset, // Our own write cursor.
+		char* soundData, // Start of our data.
+		DWORD soundBytes); // Size of block to copy.
+
 	CRITICAL_SECTION soundCriticalSection;
 	HWND window_;
-	HANDLE soundSyncEvent = NULL;
-	HANDLE hThread = NULL;
+	HANDLE soundSyncEvent_ = NULL;
+	HANDLE hThread_ = NULL;

-	StreamCallback callback;
+	StreamCallback callback_;

-	IDirectSound8 *ds = NULL;
-	IDirectSoundBuffer *dsBuffer = NULL;
+	IDirectSound8 *ds_ = NULL;
+	IDirectSoundBuffer *dsBuffer_ = NULL;

-	int bufferSize; // bytes
-	int totalRenderedBytes;
-	int sampleRate;
+	int bufferSize_; // bytes
+	int totalRenderedBytes_;
+	int sampleRate_;

-	volatile int threadData;
+	volatile int threadData_;

-	int currentPos;
-	int lastPos;
-	short realtimeBuffer[BUFSIZE * 2];
+	int currentPos_;
+	int lastPos_;
+	short realtimeBuffer_[BUFSIZE * 2];
 };

 // TODO: Get rid of this
-static DSoundState *g_dsound;
+static DSoundAudioBackend *g_dsound;

 inline int RoundDown128(int x) {
 	return x & (~127);
 }

-int DSound_GetSampleRate() {
-	if (g_dsound) {
-		return g_dsound->GetSampleRate();
-	} else {
-		return 0;
-	}
-}
-
-bool DSoundState::createBuffer() {
+bool DSoundAudioBackend::CreateBuffer() {
 	PCMWAVEFORMAT pcmwf;
 	DSBUFFERDESC dsbdesc;

 	memset(&pcmwf, 0, sizeof(PCMWAVEFORMAT));
 	memset(&dsbdesc, 0, sizeof(DSBUFFERDESC));

-	bufferSize = BUFSIZE;
+	bufferSize_ = BUFSIZE;

 	pcmwf.wf.wFormatTag = WAVE_FORMAT_PCM;
 	pcmwf.wf.nChannels = 2;
-	pcmwf.wf.nSamplesPerSec = sampleRate;
+	pcmwf.wf.nSamplesPerSec = sampleRate_;
 	pcmwf.wf.nBlockAlign = 4;
 	pcmwf.wf.nAvgBytesPerSec = pcmwf.wf.nSamplesPerSec * pcmwf.wf.nBlockAlign;
 	pcmwf.wBitsPerSample = 16;

 	dsbdesc.dwSize = sizeof(DSBUFFERDESC);
 	dsbdesc.dwFlags = DSBCAPS_GETCURRENTPOSITION2 | DSBCAPS_GLOBALFOCUS; // //DSBCAPS_CTRLPAN | DSBCAPS_CTRLVOLUME | DSBCAPS_CTRLFREQUENCY; 
-	dsbdesc.dwBufferBytes = bufferSize;  //FIX32(pcmwf.wf.nAvgBytesPerSec);   //change to set buffer size
+	dsbdesc.dwBufferBytes = bufferSize_;  //FIX32(pcmwf.wf.nAvgBytesPerSec);   //change to set buffer size
 	dsbdesc.lpwfxFormat = (WAVEFORMATEX *)&pcmwf;

-	if (SUCCEEDED(ds->CreateSoundBuffer(&dsbdesc, &dsBuffer, NULL))) {
-		dsBuffer->SetCurrentPosition(0);
+	if (SUCCEEDED(ds_->CreateSoundBuffer(&dsbdesc, &dsBuffer_, NULL))) {
+		dsBuffer_->SetCurrentPosition(0);
 		return true;
 	} else {
-		dsBuffer = NULL;
+		dsBuffer_ = NULL;
 		return false;
 	}
 }

-bool DSoundState::writeDataToBuffer(DWORD dwOffset, // Our own write cursor.
+bool DSoundAudioBackend::WriteDataToBuffer(DWORD offset, // Our own write cursor.
 																		char* soundData, // Start of our data.
-																		DWORD dwSoundBytes) { // Size of block to copy.
+																		DWORD soundBytes) { // Size of block to copy.
 	void *ptr1, *ptr2;
 	DWORD numBytes1, numBytes2;
 	// Obtain memory address of write block. This will be in two parts if the block wraps around.
-	HRESULT hr = dsBuffer->Lock(dwOffset, dwSoundBytes, &ptr1, &numBytes1, &ptr2, &numBytes2, 0);
+	HRESULT hr = dsBuffer_->Lock(offset, soundBytes, &ptr1, &numBytes1, &ptr2, &numBytes2, 0);

 	// If the buffer was lost, restore and retry lock.
 	/*
@ -112,7 +104,7 @@ bool DSoundState::writeDataToBuffer(DWORD dwOffset, // Our own write cursor.
 			memcpy(ptr2, soundData+numBytes1, numBytes2);

 		// Release the data back to DirectSound.
-		dsBuffer->Unlock(ptr1, numBytes1, ptr2, numBytes2);
+		dsBuffer_->Unlock(ptr1, numBytes1, ptr2, numBytes2);
 		return true;
 	}/* 
 		else
@ -124,156 +116,122 @@ bool DSoundState::writeDataToBuffer(DWORD dwOffset, // Our own write cursor.
 	return false;
 }

-unsigned int WINAPI soundThread(void *param) {
-	DSoundState *state = (DSoundState *)param;
+unsigned int WINAPI DSoundAudioBackend::soundThread(void *param) {
+	DSoundAudioBackend *state = (DSoundAudioBackend *)param;
 	return state->RunThread();
 }

-int DSoundState::RunThread() {
+int DSoundAudioBackend::RunThread() {
 	setCurrentThreadName("DSound");
-	currentPos = 0;
-	lastPos = 0;
+	currentPos_ = 0;
+	lastPos_ = 0;
 	//writeDataToBuffer(0,realtimeBuffer,bufferSize);
 	//  dsBuffer->Lock(0, bufferSize, (void **)&p1, &num1, (void **)&p2, &num2, 0); 

-	dsBuffer->Play(0,0,DSBPLAY_LOOPING);
+	dsBuffer_->Play(0,0,DSBPLAY_LOOPING);

-	while (!threadData) {
+	while (!threadData_) {
 		EnterCriticalSection(&soundCriticalSection);

-		dsBuffer->GetCurrentPosition((DWORD *)&currentPos, 0);
-		int numBytesToRender = RoundDown128(ModBufferSize(currentPos - lastPos)); 
+		dsBuffer_->GetCurrentPosition((DWORD *)&currentPos_, 0);
+		int numBytesToRender = RoundDown128(ModBufferSize(currentPos_ - lastPos_)); 

 		if (numBytesToRender >= 256) {
-			int numBytesRendered = 4 * (*callback)(realtimeBuffer, numBytesToRender >> 2, 16, 44100, 2);
+			int numBytesRendered = 4 * (*callback_)(realtimeBuffer_, numBytesToRender >> 2, 16, 44100, 2);
 			//We need to copy the full buffer, regardless of what the mixer claims to have filled
 			//If we don't do this then the sound will loop if the sound stops and the mixer writes only zeroes
 			numBytesRendered = numBytesToRender;
-			writeDataToBuffer(lastPos, (char *) realtimeBuffer, numBytesRendered);
+			WriteDataToBuffer(lastPos_, (char *) realtimeBuffer_, numBytesRendered);

-			currentPos = ModBufferSize(lastPos + numBytesRendered);
-			totalRenderedBytes += numBytesRendered;
+			currentPos_ = ModBufferSize(lastPos_ + numBytesRendered);
+			totalRenderedBytes_ += numBytesRendered;

-			lastPos = currentPos;
+			lastPos_ = currentPos_;
 		}

 		LeaveCriticalSection(&soundCriticalSection);
-		WaitForSingleObject(soundSyncEvent, MAXWAIT);
+		WaitForSingleObject(soundSyncEvent_, MAXWAIT);
 	}
-	dsBuffer->Stop();
+	dsBuffer_->Stop();

-	threadData = 2;
+	threadData_ = 2;
 	return 0;
 }

-DSoundState::DSoundState(HWND window, StreamCallback _callback, int sampleRate)
-	: window_(window), callback(_callback), sampleRate(sampleRate) {
-
-	callback = _callback;
-	threadData=0;
+DSoundAudioBackend::DSoundAudioBackend() : threadData_(0), ds_(nullptr) {
 }

-bool DSoundState::Init() {
-	soundSyncEvent = CreateEvent(0, false, false, 0);
-	InitializeCriticalSection(&soundCriticalSection);
+DSoundAudioBackend::~DSoundAudioBackend() {
+	if (!ds_)
+		return;

-	if (FAILED(DirectSoundCreate8(0,&ds,0))) {
-		CloseHandle(soundSyncEvent);
-		DeleteCriticalSection(&soundCriticalSection);
-		return false;
-	}
-
-	ds->SetCooperativeLevel(window_, DSSCL_PRIORITY);
-	if (!createBuffer())
-		return false;
-
-	DWORD num1;
-	short *p1; 
-
-	dsBuffer->Lock(0, bufferSize, (void **)&p1, &num1, 0, 0, 0); 
-
-	memset(p1,0,num1);
-	dsBuffer->Unlock(p1,num1,0,0);
-	totalRenderedBytes = -bufferSize;
-	hThread = (HANDLE)_beginthreadex(0, 0, soundThread, (void *)this, 0, 0);
-	SetThreadPriority(hThread, THREAD_PRIORITY_ABOVE_NORMAL);
-	return true;
-}
-
-void DSoundState::UpdateSound() {
-	if (soundSyncEvent != NULL)
-		SetEvent(soundSyncEvent);
-}
-
-
-void DSoundState::StopSound() {
-	if (!dsBuffer)
+	if (!dsBuffer_)
 		return;

 	EnterCriticalSection(&soundCriticalSection);

-	if (threadData == 0) {
-		threadData = 1;
+	if (threadData_ == 0) {
+		threadData_ = 1;
 	}

-	if (hThread != NULL) {
-		WaitForSingleObject(hThread, 1000);
-		CloseHandle(hThread);
-		hThread = NULL;
+	if (hThread_ != NULL) {
+		WaitForSingleObject(hThread_, 1000);
+		CloseHandle(hThread_);
+		hThread_ = NULL;
 	}

-	if (threadData == 2) {
-		if (dsBuffer != NULL)
-			dsBuffer->Release();
-		dsBuffer = NULL;
-		if (ds != NULL)
-			ds->Release();
-		ds = NULL;
+	if (threadData_ == 2) {
+		if (dsBuffer_ != NULL)
+			dsBuffer_->Release();
+		dsBuffer_ = NULL;
+		if (ds_ != NULL)
+			ds_->Release();
+		ds_ = NULL;
 	}

-	if (soundSyncEvent != NULL) {
-		CloseHandle(soundSyncEvent);
+	if (soundSyncEvent_ != NULL) {
+		CloseHandle(soundSyncEvent_);
 	}
-	soundSyncEvent = NULL;
+	soundSyncEvent_ = NULL;
 	LeaveCriticalSection(&soundCriticalSection);
 	DeleteCriticalSection(&soundCriticalSection);
 }

-int DSoundState::GetCurSample() {
-	EnterCriticalSection(&soundCriticalSection);
-	int playCursor;
-	dsBuffer->GetCurrentPosition((DWORD *)&playCursor,0);
-	playCursor = ModBufferSize(playCursor-lastPos)+totalRenderedBytes;
-	LeaveCriticalSection(&soundCriticalSection);
-	return playCursor;
-}
-
-void DSound_UpdateSound() {
-	if (g_dsound) {
-		g_dsound->UpdateSound();
-	}
-}
-
-bool DSound_StartSound(HWND window, StreamCallback _callback, int sampleRate) {
-	g_dsound = new DSoundState(window, _callback, sampleRate);
-	if (!g_dsound->Init()) {
-		delete g_dsound;
-		g_dsound = NULL;
+bool DSoundAudioBackend::Init(HWND window, StreamCallback _callback, int sampleRate) {
+	window_ = window;
+	callback_ = _callback;
+	sampleRate_ = sampleRate;
+	threadData_ = 0;
+	if (FAILED(DirectSoundCreate8(0, &ds_, 0))) {
+		ds_ = NULL;
 		return false;
 	}
+
+	ds_->SetCooperativeLevel(window_, DSSCL_PRIORITY);
+	if (!CreateBuffer())
+		return false;
+
+	soundSyncEvent_ = CreateEvent(0, false, false, 0);
+	InitializeCriticalSection(&soundCriticalSection);
+
+	DWORD num1;
+	short *p1; 
+
+	dsBuffer_->Lock(0, bufferSize_, (void **)&p1, &num1, 0, 0, 0); 
+
+	memset(p1,0,num1);
+	dsBuffer_->Unlock(p1,num1,0,0);
+	totalRenderedBytes_ = -bufferSize_;
+	hThread_ = (HANDLE)_beginthreadex(0, 0, soundThread, (void *)this, 0, 0);
+	SetThreadPriority(hThread_, THREAD_PRIORITY_ABOVE_NORMAL);
 	return true;
 }

-void DSound_StopSound() {
-	g_dsound->StopSound();
-	delete g_dsound;
-	g_dsound = NULL;
+void DSoundAudioBackend::Update() {
+	if (soundSyncEvent_ != NULL)
+		SetEvent(soundSyncEvent_);
 }

-int DSound_GetCurSample()	{
-	return g_dsound->GetCurSample();
-}
-
-float DSound_GetTimer() {
-	return (float)g_dsound->GetCurSample()*(1.0f/(4.0f*44100.0f));
-}
+WindowsAudioBackend *CreateAudioBackend(AudioBackendType type) {
+	return new DSoundAudioBackend();
+}
--- a/Windows/DSoundStream.h
+++ b/Windows/DSoundStream.h
@ -1,16 +1,23 @@
-#ifndef __SOUNDSTREAM_H__
-#define __SOUNDSTREAM_H__
+#pragma once

 #include "Common/CommonWindows.h"

 typedef int (*StreamCallback)(short *buffer, int numSamples, int bits, int rate, int channels);

-bool DSound_StartSound(HWND window, StreamCallback _callback, int sampleRate);
-void DSound_UpdateSound();
-void DSound_StopSound();
+class WindowsAudioBackend {
+public:
+	WindowsAudioBackend() {}
+	virtual ~WindowsAudioBackend() {}
+	virtual bool Init(HWND window, StreamCallback _callback, int sampleRate) = 0;
+	virtual void Update() {}  // Doesn't have to do anything
+	virtual int GetSampleRate() = 0;
+};

-float DSound_GetTimer();
-int DSound_GetCurSample();
-int DSound_GetSampleRate();
- 
-#endif //__SOUNDSTREAM_H__
+enum AudioBackendType {
+	AUDIO_BACKEND_DSOUND,
+	// AUDIO_BACKEND_WASAPI,   // TODO
+	AUDIO_BACKEND_AUTO
+};
+
+// Factory
+WindowsAudioBackend *CreateAudioBackend(AudioBackendType type);
--- a/Windows/WindowsHost.cpp
+++ b/Windows/WindowsHost.cpp
@ -126,9 +126,13 @@ void WindowsHost::InitSound()
 {
 }

+// UGLY!
+extern WindowsAudioBackend *winAudioBackend;
+
 void WindowsHost::UpdateSound()
 {
-	DSound_UpdateSound();
+	if (winAudioBackend)
+		winAudioBackend->Update();
 }

 void WindowsHost::ShutdownSound()
--- a/Windows/main.cpp
+++ b/Windows/main.cpp
@ -256,10 +256,13 @@ std::string System_GetProperty(SystemProperty prop) {
 	}
 }

+// Ugly!
+extern WindowsAudioBackend *winAudioBackend;
+
 int System_GetPropertyInt(SystemProperty prop) {
 	switch (prop) {
 	case SYSPROP_AUDIO_SAMPLE_RATE:
-		return DSound_GetSampleRate();
+		return winAudioBackend ? winAudioBackend->GetSampleRate() : -1;
 	default:
 		return -1;
 	}
--- a/android/jni/Android.mk
+++ b/android/jni/Android.mk
@ -187,6 +187,7 @@ EXEC_AND_LIB_FILES := \
  $(SRC)/Core/HW/MpegDemux.cpp.arm \
  $(SRC)/Core/HW/MediaEngine.cpp.arm \
  $(SRC)/Core/HW/SasAudio.cpp.arm \
+  $(SRC)/Core/HW/StereoResampler.cpp.arm \
  $(SRC)/Core/Core.cpp \
  $(SRC)/Core/Config.cpp \
  $(SRC)/Core/CoreTiming.cpp \
--- a/ios/main.mm
+++ b/ios/main.mm
@ -22,7 +22,14 @@ std::string System_GetProperty(SystemProperty prop) {
 	}
 }

-int System_GetPropertyInt(SystemProperty prop) { return -1; }
+int System_GetPropertyInt(SystemProperty prop) {
+  switch (prop) {
+  case SYSPROP_AUDIO_SAMPLE_RATE:
+    return 44100;
+  default:
+    return -1;
+  }
+}

 void System_SendMessage(const char *command, const char *parameter) {
 	if (!strcmp(command, "finish")) {
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 1b5bf730caffa702de34a6db9f0ae93fbbd88eed
+Subproject commit 84ed01df53cd557eb126c1f6badef7d9201add5f