gsdx: remove aggressive threading

http://wiki.pcsx2.net/index.php/PCSX2_Documentation/Threading_Basics
This commit is contained in:
Gregory Hainaut 2015-11-20 14:56:29 +01:00
parent a46204ef9e
commit 19c9a0b441
5 changed files with 4 additions and 206 deletions

View File

@ -341,7 +341,6 @@ void populate_sw_table(GtkWidget* sw_table)
GtkWidget* aa_check = CreateCheckBox("Edge anti-aliasing (AA1)", "aa1");
GtkWidget* mipmap_check = CreateCheckBox("Mipmap", "mipmap", true);
GtkWidget* spin_thread_check= CreateCheckBox("Disable thread sleeping (6+ cores CPU)", "spin_thread");
AddTooltip(aa_check, IDC_AA1);
AddTooltip(mipmap_check, IDC_MIPMAP);
@ -350,7 +349,6 @@ void populate_sw_table(GtkWidget* sw_table)
s_table_line = 0;
InsertWidgetInTable(sw_table , threads_label , threads_spin);
InsertWidgetInTable(sw_table , aa_check, mipmap_check);
InsertWidgetInTable(sw_table , spin_thread_check , spin_thread_check);
}
void populate_shader_table(GtkWidget* shader_table)

View File

@ -1232,27 +1232,3 @@ void GSRasterizerList::GSWorker::Process(shared_ptr<GSRasterizerData>& item)
{
m_r->Draw(item.get());
}
// GSRasterizerList::GSWorkerSpin
GSRasterizerList::GSWorkerSpin::GSWorkerSpin(GSRasterizer* r)
: GSJobQueueSpin<shared_ptr<GSRasterizerData>, 256>()
, m_r(r)
{
}
GSRasterizerList::GSWorkerSpin::~GSWorkerSpin()
{
Wait();
delete m_r;
}
int GSRasterizerList::GSWorkerSpin::GetPixels(bool reset)
{
return m_r->GetPixels(reset);
}
void GSRasterizerList::GSWorkerSpin::Process(shared_ptr<GSRasterizerData>& item)
{
m_r->Draw(item.get());
}

View File

@ -195,23 +195,8 @@ protected:
void Process(shared_ptr<GSRasterizerData>& item);
};
class GSWorkerSpin : public GSJobQueueSpin<shared_ptr<GSRasterizerData>, 256>
{
GSRasterizer* m_r;
public:
GSWorkerSpin(GSRasterizer* r);
virtual ~GSWorkerSpin();
int GetPixels(bool reset);
// GSJobQueue
void Process(shared_ptr<GSRasterizerData>& item);
};
GSPerfMon* m_perfmon;
vector<IGSJobQueue<shared_ptr<GSRasterizerData> > *> m_workers;
vector<GSWorker*> m_workers;
uint8* m_scanline;
GSRasterizerList(int threads, GSPerfMon* perfmon);
@ -219,7 +204,7 @@ protected:
public:
virtual ~GSRasterizerList();
template<class DS> static IRasterizer* Create(int threads, GSPerfMon* perfmon, bool spin_thread = false)
template<class DS> static IRasterizer* Create(int threads, GSPerfMon* perfmon)
{
threads = std::max<int>(threads, 0);
@ -233,10 +218,7 @@ public:
for(int i = 0; i < threads; i++)
{
if (spin_thread)
rl->m_workers.push_back(new GSWorkerSpin(new GSRasterizer(new DS(), i, threads, perfmon)));
else
rl->m_workers.push_back(new GSWorker(new GSRasterizer(new DS(), i, threads, perfmon)));
rl->m_workers.push_back(new GSWorker(new GSRasterizer(new DS(), i, threads, perfmon)));
}
return rl;

View File

@ -41,8 +41,7 @@ GSRendererSW::GSRendererSW(int threads)
memset(m_texture, 0, sizeof(m_texture));
bool spin_thread = !!theApp.GetConfig("spin_thread", 0);
m_rl = GSRasterizerList::Create<GSDrawScanline>(threads, &m_perfmon, spin_thread);
m_rl = GSRasterizerList::Create<GSDrawScanline>(threads, &m_perfmon);
m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32);

View File

@ -82,7 +82,6 @@ public:
#endif
// To allow switching between queue dynamically
template<class T> class IGSJobQueue : public GSThread
{
public:
@ -97,9 +96,6 @@ public:
virtual int GetPixels(bool reset) = 0;
};
// This queue doesn't reserve any thread. It would be nicer for 2c/4c CPU.
// pros: no hard limit on thread numbers
// cons: less performance by thread
template<class T, int CAPACITY> class GSJobQueue : public IGSJobQueue<T>
{
protected:
@ -187,156 +183,3 @@ public:
this->Process(item);
}
};
// This queue reserves 'only' RENDERING threads mostly the same performance as a no reservation queue if the CPU is fast enough
// pros: nearly best fps by thread
// cons: requires (1 + eThreads) cores for GS emulation only ! Reserved to 6/8 cores CPU.
// Note: I'm not sure of the source of the speedup
// 1/ It could be related to less MT logic (lock, cond var)
// 2/ But I highly suspect that waking up thread is rather slow. My guess
// is that low power feature (like C state) increases latency. In this case
// gain will be smaller if PCSX2 is running or in limited core CPU (<=4)
template<class T, int CAPACITY> class GSJobQueueSpin : public IGSJobQueue<T>
{
protected:
std::atomic<int16_t> m_count;
std::atomic<bool> m_exit;
ringbuffer_base<T, CAPACITY> m_queue;
std::mutex m_lock;
std::condition_variable m_empty;
void ThreadProc() {
std::unique_lock<std::mutex> l(m_lock, defer_lock);
while (true) {
while (m_count == 0) {
if (m_exit.load(memory_order_acquire)) return;
std::this_thread::yield();
}
int16_t consumed = 0;
for (int16_t nb = m_count; nb >= 0; nb--) {
if (m_queue.consume_one(*this))
consumed++;
}
l.lock();
m_count -= consumed;
l.unlock();
if (m_count <= 0)
m_empty.notify_one();
}
}
public:
GSJobQueueSpin() :
m_count(0),
m_exit(false)
{
this->CreateThread();
};
virtual ~GSJobQueueSpin() {
m_exit.store(true, memory_order_release);
this->CloseThread();
}
bool IsEmpty() const {
ASSERT(m_count >= 0);
return m_count == 0;
}
void Push(const T& item) {
while(!m_queue.push(item))
std::this_thread::yield();
m_count++;
}
void Wait() {
if (m_count > 0) {
std::unique_lock<std::mutex> l(m_lock);
while (m_count > 0) {
m_empty.wait(l);
}
}
ASSERT(m_count == 0);
}
void operator() (T& item) {
this->Process(item);
}
};
// This queue reserves RENDERING threads + GS threads onto dedicated CPU
// pros: best fps by thread
// cons: requires (1 + eThreads) cores for GS emulation only ! Reserved to 8 cores CPU.
#if 0
template<class T> class GSJobQueue : public IGSJobQueue<T>
{
protected:
std::atomic<int16_t> m_count;
std::atomic<bool> m_exit;
boost::lockfree::spsc_queue<T, boost::lockfree::capacity<255> > m_queue;
void ThreadProc() {
while (true) {
while (m_count == 0) {
if (m_exit.load(memory_order_acquire)) return;
std::this_thread::yield();
}
m_count -= m_queue.consume_all(*this);
}
}
public:
GSJobQueue() :
m_count(0),
m_exit(false)
{
CreateThread();
};
virtual ~GSJobQueue() {
m_exit = true;
CloseThread();
}
bool IsEmpty() const {
ASSERT(m_count >= 0);
return m_count == 0;
}
void Push(const T& item) {
m_count++;
while(!m_queue.push(item))
std::this_thread::yield();
}
void Wait() {
while (m_count > 0)
std::this_thread::yield();
ASSERT(m_count == 0);
}
virtual void Process(T& item) = 0;
void operator() (T& item) {
this->Process(item);
}
};
#endif