mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-26 23:10:38 +00:00
Address feedback (except the mailbox refcount)
This commit is contained in:
parent
1d59560409
commit
81f0c3a8e4
@ -26,19 +26,14 @@ WaitableCounter *ParallelRangeLoopWaitable(ThreadManager *threadMan, const std::
|
||||
}
|
||||
|
||||
int numTasks = threadMan->GetNumLooperThreads();
|
||||
|
||||
int range = upper - lower;
|
||||
if (range <= 0) {
|
||||
// Bad range. A finished counter allocated.
|
||||
// Nothing to do. A finished counter allocated to keep the API.
|
||||
return new WaitableCounter(0);
|
||||
}
|
||||
|
||||
if (range <= numTasks) {
|
||||
// Just assign one task per thread, as many as we have.
|
||||
WaitableCounter *waitableCounter = new WaitableCounter(range);
|
||||
for (int i = 0; i < range; i++) {
|
||||
threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(waitableCounter, loop, i, i + 1), TaskType::CPU_COMPUTE);
|
||||
}
|
||||
} else if (range <= minSize) {
|
||||
// Single background task.
|
||||
WaitableCounter *waitableCounter = new WaitableCounter(1);
|
||||
threadMan->EnqueueTaskOnThread(0, new LoopRangeTask(waitableCounter, loop, lower, upper), TaskType::CPU_COMPUTE);
|
||||
return waitableCounter;
|
||||
} else {
|
||||
// Split the range between threads. Allow for some fractional bits.
|
||||
@ -68,7 +63,7 @@ WaitableCounter *ParallelRangeLoopWaitable(ThreadManager *threadMan, const std::
|
||||
}
|
||||
threadMan->EnqueueTaskOnThread(i, new LoopRangeTask(waitableCounter, loop, start, end), TaskType::CPU_COMPUTE);
|
||||
counter += delta;
|
||||
if ((counter >> fractionalBits) > upper) {
|
||||
if ((counter >> fractionalBits) >= upper) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -78,7 +73,6 @@ WaitableCounter *ParallelRangeLoopWaitable(ThreadManager *threadMan, const std::
|
||||
int stragglerStart = (int)(counter >> fractionalBits);
|
||||
int stragglerEnd = upper;
|
||||
if (stragglerStart < stragglerEnd) {
|
||||
// printf("doing stragglers: %d-%d\n", start, upper);
|
||||
loop(stragglerStart, stragglerEnd);
|
||||
}
|
||||
return waitableCounter;
|
||||
@ -114,14 +108,13 @@ void ParallelMemcpy(ThreadManager *threadMan, void *dst, const void *src, size_t
|
||||
return;
|
||||
}
|
||||
|
||||
// 128 is the largest cacheline size on common CPUs.
|
||||
// Still I suspect that the optimal minSize is a lot higher.
|
||||
// unknown's testing showed that 128kB is an appropriate minimum size.
|
||||
|
||||
char *d = (char *)dst;
|
||||
char *s = (char *)src;
|
||||
const char *s = (const char *)src;
|
||||
ParallelRangeLoop(threadMan, [&](int l, int h) {
|
||||
memmove(d + l, s + l, h - l);
|
||||
}, 0, (int)bytes, 128);
|
||||
}, 0, (int)bytes, 128 * 1024);
|
||||
}
|
||||
|
||||
// NOTE: Supports a max of 2GB.
|
||||
@ -132,11 +125,10 @@ void ParallelMemset(ThreadManager *threadMan, void *dst, uint8_t value, size_t b
|
||||
return;
|
||||
}
|
||||
|
||||
// 128 is the largest cacheline size on common CPUs.
|
||||
// Still I suspect that the optimal minSize is a lot higher.
|
||||
// unknown's testing showed that 128kB is an appropriate minimum size.
|
||||
|
||||
char *d = (char *)dst;
|
||||
ParallelRangeLoop(threadMan, [&](int l, int h) {
|
||||
memset(d + l, value, h - l);
|
||||
}, 0, (int)bytes, 128);
|
||||
}, 0, (int)bytes, 128 * 1024);
|
||||
}
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
#include "Common/Thread/ThreadManager.h"
|
||||
|
||||
// Same as the latch from C++21, just counting upwards for no particular reason.
|
||||
// Same as the latch from C++21.
|
||||
struct WaitableCounter : public Waitable {
|
||||
public:
|
||||
WaitableCounter(int count) : count_(count) {}
|
||||
@ -25,10 +25,9 @@ public:
|
||||
|
||||
void Wait() override {
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
if (count_ == 0) {
|
||||
return;
|
||||
while (count_ != 0) {
|
||||
cond_.wait(lock);
|
||||
}
|
||||
cond_.wait(lock);
|
||||
}
|
||||
|
||||
int count_;
|
||||
@ -44,6 +43,6 @@ void ParallelRangeLoop(ThreadManager *threadMan, const std::function<void(int, i
|
||||
|
||||
// Common utilities for large (!) memory copies.
|
||||
// Will only fall back to threads if it seems to make sense.
|
||||
|
||||
// NOTE: These support a max of 2GB.
|
||||
void ParallelMemcpy(ThreadManager *threadMan, void *dst, const void *src, size_t bytes);
|
||||
void ParallelMemset(ThreadManager *threadMan, void *dst, uint8_t value, size_t bytes);
|
||||
|
@ -28,7 +28,7 @@ struct GlobalThreadContext {
|
||||
std::deque<Task *> queue;
|
||||
std::vector<ThreadContext *> threads_;
|
||||
|
||||
int roundRobin;
|
||||
int roundRobin = 0;
|
||||
};
|
||||
|
||||
struct ThreadContext {
|
||||
|
@ -17,7 +17,6 @@ public:
|
||||
virtual void Run() = 0;
|
||||
virtual bool Cancellable() { return false; }
|
||||
virtual void Cancel() {}
|
||||
virtual float Priority() { return 1.0f; }
|
||||
virtual uint64_t id() { return 0; }
|
||||
};
|
||||
|
||||
@ -53,8 +52,8 @@ public:
|
||||
// something meaningful yourself.
|
||||
void TryCancelTask(uint64_t id);
|
||||
|
||||
// Parallel loops get to use half the threads,
|
||||
// so we still have some worker threads for other tasks.
|
||||
// Parallel loops (assumed compute-limited) get one thread per logical core. We have a few extra threads too
|
||||
// for I/O bounds tasks, that can be run concurrently with those.
|
||||
int GetNumLooperThreads() const;
|
||||
|
||||
private:
|
||||
|
@ -16,16 +16,17 @@
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include <algorithm>
|
||||
#include "Common/GPU/OpenGL/GLCommon.h"
|
||||
|
||||
#include "GPU/Common/TextureScalerCommon.h"
|
||||
#include "GPU/GLES/TextureScalerGLES.h"
|
||||
#include "Common/Data/Convert/ColorConv.h"
|
||||
#include "Common/Log.h"
|
||||
#include "Common/Thread/ParallelLoop.h"
|
||||
#include "Core/ThreadPools.h"
|
||||
#include "Common/GPU/OpenGL/GLCommon.h"
|
||||
#include "Common/GPU/DataFormat.h"
|
||||
|
||||
#include "Core/ThreadPools.h"
|
||||
#include "GPU/Common/TextureScalerCommon.h"
|
||||
#include "GPU/GLES/TextureScalerGLES.h"
|
||||
|
||||
int TextureScalerGLES::BytesPerPixel(u32 format) {
|
||||
return ((Draw::DataFormat)format == Draw::DataFormat::R8G8B8A8_UNORM) ? 4 : 2;
|
||||
}
|
||||
@ -42,15 +43,15 @@ void TextureScalerGLES::ConvertTo8888(u32 format, u32* source, u32* &dest, int w
|
||||
break;
|
||||
|
||||
case Draw::DataFormat::R4G4B4A4_UNORM_PACK16:
|
||||
ParallelRangeLoop(&g_threadManager, std::bind(&convert4444_gl, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, 1);
|
||||
ParallelRangeLoop(&g_threadManager, std::bind(&convert4444_gl, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_TEXSCALE_LINES_PER_THREAD);
|
||||
break;
|
||||
|
||||
case Draw::DataFormat::R5G6B5_UNORM_PACK16:
|
||||
ParallelRangeLoop(&g_threadManager, std::bind(&convert565_gl, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, 1);
|
||||
ParallelRangeLoop(&g_threadManager, std::bind(&convert565_gl, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_TEXSCALE_LINES_PER_THREAD);
|
||||
break;
|
||||
|
||||
case Draw::DataFormat::R5G5B5A1_UNORM_PACK16:
|
||||
ParallelRangeLoop(&g_threadManager, std::bind(&convert5551_gl, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, 1);
|
||||
ParallelRangeLoop(&g_threadManager, std::bind(&convert5551_gl, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_TEXSCALE_LINES_PER_THREAD);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -42,23 +42,21 @@ u32 TextureScalerVulkan::Get8888Format() {
|
||||
}
|
||||
|
||||
void TextureScalerVulkan::ConvertTo8888(u32 format, u32* source, u32* &dest, int width, int height) {
|
||||
const int MIN_LINES_PER_THREAD = 4;
|
||||
|
||||
switch (format) {
|
||||
case VULKAN_8888_FORMAT:
|
||||
dest = source; // already fine
|
||||
break;
|
||||
|
||||
case VULKAN_4444_FORMAT:
|
||||
ParallelRangeLoop(&g_threadManager, std::bind(&convert4444_dx9, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_LINES_PER_THREAD);
|
||||
ParallelRangeLoop(&g_threadManager, std::bind(&convert4444_dx9, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_TEXSCALE_LINES_PER_THREAD);
|
||||
break;
|
||||
|
||||
case VULKAN_565_FORMAT:
|
||||
ParallelRangeLoop(&g_threadManager, std::bind(&convert565_dx9, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_LINES_PER_THREAD);
|
||||
ParallelRangeLoop(&g_threadManager, std::bind(&convert565_dx9, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_TEXSCALE_LINES_PER_THREAD);
|
||||
break;
|
||||
|
||||
case VULKAN_1555_FORMAT:
|
||||
ParallelRangeLoop(&g_threadManager, std::bind(&convert5551_dx9, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_LINES_PER_THREAD);
|
||||
ParallelRangeLoop(&g_threadManager, std::bind(&convert5551_dx9, (u16*)source, dest, width, std::placeholders::_1, std::placeholders::_2), 0, height, MIN_TEXSCALE_LINES_PER_THREAD);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -630,15 +630,6 @@ handleELF:
|
||||
// INFO_LOG(SYSTEM, "Completed writing info for %s", info_->GetTitle().c_str());
|
||||
}
|
||||
|
||||
float Priority() override {
|
||||
auto fl = info_->GetFileLoader();
|
||||
if (fl && fl->IsRemote()) {
|
||||
// Increase the value so remote info loads after non-remote.
|
||||
return info_->lastAccessedTime + 1000.0f;
|
||||
}
|
||||
return info_->lastAccessedTime;
|
||||
}
|
||||
|
||||
private:
|
||||
Path gamePath_;
|
||||
std::shared_ptr<GameInfo> info_;
|
||||
|
@ -14,11 +14,13 @@
|
||||
#include "Common/System/NativeApp.h"
|
||||
#include "Common/System/System.h"
|
||||
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Common/File/VFS/VFS.h"
|
||||
#include "Common/File/VFS/AssetReader.h"
|
||||
#include "Common/File/FileUtil.h"
|
||||
#include "Common/GraphicsContext.h"
|
||||
#include "Common/TimeUtil.h"
|
||||
#include "Common/Thread/ThreadManager.h"
|
||||
#include "Core/Config.h"
|
||||
#include "Core/ConfigValues.h"
|
||||
#include "Core/Core.h"
|
||||
@ -330,6 +332,8 @@ int main(int argc, const char* argv[])
|
||||
if (testFilenames.empty())
|
||||
return printUsage(argv[0], argc <= 1 ? NULL : "No executables specified");
|
||||
|
||||
g_threadManager.Init(cpu_info.num_cores, cpu_info.logical_cpu_count);
|
||||
|
||||
LogManager::Init(&g_Config.bEnableLogging);
|
||||
LogManager *logman = LogManager::GetInstance();
|
||||
|
||||
|
@ -49,6 +49,10 @@ bool TestParallelLoop(ThreadManager *threadMan) {
|
||||
// Try a loop with a relatively large minimum size.
|
||||
printf("blocking test #2 [0-100)\n");
|
||||
ParallelRangeLoop(threadMan, rangeFunc, 0, 100, 40);
|
||||
// Try a loop with minimum size larger than range.
|
||||
printf("waitable test [10-30)\n");
|
||||
WaitableCounter *waitable2 = ParallelRangeLoopWaitable(threadMan, rangeFunc, 10, 30, 40);
|
||||
waitable2->WaitAndRelease();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user