MTVU: use acquire/release semantics for atomic operation

* Avoid the generation of memory barrier (mfence)
* Based on the fact that it used to work on previous code without any
  barrier

v2:
* keep basic code in reset path
* use relaxed access for isBusy. The variable doesn't carry load/store
  dependency but is instead an hint to optimize semaphore post
This commit is contained in:
Gregory Hainaut 2016-07-02 18:56:21 +02:00
parent 3b4c357aaa
commit ca46921796
3 changed files with 11 additions and 11 deletions

View File

@ -399,17 +399,17 @@ namespace Threading
ScopedLockBool(Mutex& mutexToLock, std::atomic<bool>& isLockedBool) ScopedLockBool(Mutex& mutexToLock, std::atomic<bool>& isLockedBool)
: m_lock(mutexToLock), : m_lock(mutexToLock),
m_bool(isLockedBool) { m_bool(isLockedBool) {
m_bool = m_lock.IsLocked(); m_bool.store(m_lock.IsLocked(), std::memory_order_relaxed);
} }
virtual ~ScopedLockBool() throw() { virtual ~ScopedLockBool() throw() {
m_bool = false; m_bool.store(false, std::memory_order_relaxed);
} }
void Acquire() { void Acquire() {
m_lock.Acquire(); m_lock.Acquire();
m_bool = m_lock.IsLocked(); m_bool.store(m_lock.IsLocked(), std::memory_order_relaxed);
} }
void Release() { void Release() {
m_bool = false; m_bool.store(false, std::memory_order_relaxed);
m_lock.Release(); m_lock.Release();
} }
}; };

View File

@ -75,11 +75,11 @@ void VU_Thread::Reset()
{ {
ScopedLock lock(mtxBusy); ScopedLock lock(mtxBusy);
read_pos = 0;
write_pos = 0; write_pos = 0;
write_offset = 0; write_offset = 0;
vuCycleIdx = 0; vuCycleIdx = 0;
isBusy = false; read_pos = 0;
isBusy = false;
memzero(vif); memzero(vif);
memzero(vifRegs); memzero(vifRegs);
memzero(vuCycles); memzero(vuCycles);
@ -202,7 +202,7 @@ __fi u32* VU_Thread::GetWritePtr()
__fi void VU_Thread::incReadPos(s32 offset) __fi void VU_Thread::incReadPos(s32 offset)
{ // Offset in u32 sizes { // Offset in u32 sizes
read_pos = (read_pos + offset) & buffer_mask; read_pos.store((read_pos.load(std::memory_order_relaxed) + offset) & buffer_mask, std::memory_order_release);
} }
__fi void VU_Thread::incWritePos() __fi void VU_Thread::incWritePos()
{ // Adds write_offset { // Adds write_offset
@ -272,12 +272,12 @@ u32 VU_Thread::Get_vuCycles()
void VU_Thread::KickStart(bool forceKick) void VU_Thread::KickStart(bool forceKick)
{ {
if ((forceKick && !semaEvent.Count()) if ((forceKick && !semaEvent.Count())
|| (!isBusy && GetReadPos() != write_pos)) semaEvent.Post(); || (!isBusy.load(std::memory_order_relaxed) && GetReadPos() != write_pos)) semaEvent.Post();
} }
bool VU_Thread::IsDone() bool VU_Thread::IsDone()
{ {
return !isBusy && GetReadPos() == GetWritePos(); return !isBusy.load(std::memory_order_relaxed) && GetReadPos() == GetWritePos();
} }
void VU_Thread::WaitVU() void VU_Thread::WaitVU()

View File

@ -30,8 +30,8 @@ class VU_Thread : public pxThread {
static const s32 buffer_size = (_1mb * 16) / sizeof(s32); static const s32 buffer_size = (_1mb * 16) / sizeof(s32);
static const u32 buffer_mask = buffer_size - 1; static const u32 buffer_mask = buffer_size - 1;
__aligned(4) u32 buffer[buffer_size]; __aligned(4) u32 buffer[buffer_size];
__aligned(4) std::atomic<int> read_pos; // Only modified by VU thread std::atomic<int> read_pos; // Only modified by VU thread
__aligned(4) std::atomic<bool> isBusy; // Is thread processing data? std::atomic<bool> isBusy; // Is thread processing data?
__aligned(4) s32 write_pos; // Only modified by EE thread __aligned(4) s32 write_pos; // Only modified by EE thread
__aligned(4) s32 write_offset; // Only modified by EE thread __aligned(4) s32 write_offset; // Only modified by EE thread
__aligned(4) Mutex mtxBusy; __aligned(4) Mutex mtxBusy;