mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-02-22 07:03:38 +00:00
Parallelized texture scaling
This commit is contained in:
parent
5e918a644f
commit
af68180319
@ -34,4 +34,48 @@ std::string MemUsage();
|
||||
|
||||
inline int GetPageSize() { return 4096; }
|
||||
|
||||
template <typename T>
|
||||
class SimpleBuf {
|
||||
public:
|
||||
SimpleBuf() : buf_(NULL), size_(0) {
|
||||
}
|
||||
|
||||
SimpleBuf(size_t size) : buf_(NULL) {
|
||||
resize(size);
|
||||
}
|
||||
|
||||
~SimpleBuf() {
|
||||
if (buf_ != NULL) {
|
||||
FreeMemoryPages(buf_, size_ * sizeof(T));
|
||||
}
|
||||
}
|
||||
|
||||
inline T &operator[](size_t index) {
|
||||
return buf_[index];
|
||||
}
|
||||
|
||||
// Doesn't preserve contents.
|
||||
void resize(size_t size) {
|
||||
if (size_ < size) {
|
||||
if (buf_ != NULL) {
|
||||
FreeMemoryPages(buf_, size_ * sizeof(T));
|
||||
}
|
||||
buf_ = (T *)AllocateMemoryPages(size * sizeof(T));
|
||||
size_ = size;
|
||||
}
|
||||
}
|
||||
|
||||
T *data() {
|
||||
return buf_;
|
||||
}
|
||||
|
||||
size_t size() {
|
||||
return size_;
|
||||
}
|
||||
|
||||
private:
|
||||
T *buf_;
|
||||
size_t size_;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -18,10 +18,92 @@
|
||||
#include "TextureScaler.h"
|
||||
|
||||
#include "Core/Config.h"
|
||||
#include "Common/Log.h"
|
||||
#include "Common/MsgHandler.h"
|
||||
#include "Common/CommonFuncs.h"
|
||||
#include "ext/xbrz/xbrz.h"
|
||||
|
||||
WorkerThread::WorkerThread() : active(true), started(false) {
|
||||
thread = new std::thread([&]() { WorkFunc(); });
|
||||
doneMutex.lock();
|
||||
while(!started) { };
|
||||
}
|
||||
|
||||
WorkerThread::~WorkerThread() {
|
||||
mutex.lock();
|
||||
active = false;
|
||||
signal.notify_one();
|
||||
mutex.unlock();
|
||||
thread->join();
|
||||
delete thread;
|
||||
}
|
||||
|
||||
void WorkerThread::Process(const std::function<void()>& work) {
|
||||
mutex.lock();
|
||||
work_ = work;
|
||||
signal.notify_one();
|
||||
mutex.unlock();
|
||||
}
|
||||
|
||||
void WorkerThread::WaitForCompletion() {
|
||||
done.wait(doneMutex);
|
||||
}
|
||||
|
||||
void WorkerThread::WorkFunc() {
|
||||
mutex.lock();
|
||||
started = true;
|
||||
while(active) {
|
||||
signal.wait(mutex);
|
||||
if(active) work_();
|
||||
doneMutex.lock();
|
||||
done.notify_one();
|
||||
doneMutex.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TextureScaler::TextureScaler() : numThreads(4), workersStarted(false) {
|
||||
}
|
||||
|
||||
void TextureScaler::StartWorkers() {
|
||||
if(!workersStarted) {
|
||||
for(int i=0; i<numThreads; ++i) {
|
||||
workers.push_back(std::make_shared<WorkerThread>());
|
||||
}
|
||||
workersStarted = true;
|
||||
}
|
||||
}
|
||||
|
||||
void TextureScaler::ParallelLoop(std::function<void(int,int)> loop, int lower, int upper) {
|
||||
StartWorkers();
|
||||
int range = upper-lower;
|
||||
if(range >= numThreads*2) { // don't parallelize tiny loops
|
||||
// could do slightly better load balancing for the generic case,
|
||||
// but doesn't matter since all our loops are power of 2
|
||||
int chunk = range/numThreads;
|
||||
for(int s=lower, i=0; i<numThreads; s+=chunk, ++i) {
|
||||
workers[i]->Process(std::bind(loop, s, std::min(s+chunk,upper)));
|
||||
}
|
||||
for(int i=0; i<numThreads; ++i) {
|
||||
workers[i]->WaitForCompletion();
|
||||
}
|
||||
} else {
|
||||
loop(lower, upper);
|
||||
}
|
||||
}
|
||||
|
||||
//#define SCALING_MEASURE_TIME
|
||||
|
||||
#ifdef SCALING_MEASURE_TIME
|
||||
#include "native/base/timeutil.h"
|
||||
#endif
|
||||
|
||||
void TextureScaler::Scale(u32* &data, GLenum &dstFmt, int &width, int &height) {
|
||||
if(g_Config.iXBRZTexScalingLevel > 1) {
|
||||
#ifdef SCALING_MEASURE_TIME
|
||||
double t_start = real_time_now();
|
||||
#endif
|
||||
|
||||
int factor = g_Config.iXBRZTexScalingLevel;
|
||||
|
||||
// depending on the factor and texture sizes, these can be pretty large (25 MB for a 512 by 512 texture with scaling factor 5)
|
||||
@ -37,52 +119,71 @@ void TextureScaler::Scale(u32* &data, GLenum &dstFmt, int &width, int &height) {
|
||||
break;
|
||||
|
||||
case GL_UNSIGNED_SHORT_4_4_4_4:
|
||||
for(int y = 0; y < height; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = ((u16*)data)[y*width + x];
|
||||
u32 r = ((val>>12) & 0xF) * 17;
|
||||
u32 g = ((val>> 8) & 0xF) * 17;
|
||||
u32 b = ((val>> 4) & 0xF) * 17;
|
||||
u32 a = ((val>> 0) & 0xF) * 17;
|
||||
xbrzInputBuf[y*width + x] = (a << 24) | (b << 16) | (g << 8) | r;
|
||||
ParallelLoop([&](int l, int u){
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = ((u16*)data)[y*width + x];
|
||||
u32 r = ((val>>12) & 0xF) * 17;
|
||||
u32 g = ((val>> 8) & 0xF) * 17;
|
||||
u32 b = ((val>> 4) & 0xF) * 17;
|
||||
u32 a = ((val>> 0) & 0xF) * 17;
|
||||
xbrzInputBuf[y*width + x] = (a << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
}, 0, height);
|
||||
break;
|
||||
|
||||
case GL_UNSIGNED_SHORT_5_6_5:
|
||||
for(int y = 0; y < height; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = ((u16*)data)[y*width + x];
|
||||
u32 r = ((val>>11) & 0x1F) * 8;
|
||||
u32 g = ((val>> 5) & 0x3F) * 4;
|
||||
u32 b = ((val ) & 0x1F) * 8;
|
||||
xbrzInputBuf[y*width + x] = (0xFF << 24) | (b << 16) | (g << 8) | r;
|
||||
ParallelLoop([&](int l, int u){
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = ((u16*)data)[y*width + x];
|
||||
u32 r = ((val>>11) & 0x1F) * 8;
|
||||
u32 g = ((val>> 5) & 0x3F) * 4;
|
||||
u32 b = ((val ) & 0x1F) * 8;
|
||||
xbrzInputBuf[y*width + x] = (0xFF << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
}, 0, height);
|
||||
break;
|
||||
|
||||
case GL_UNSIGNED_SHORT_5_5_5_1:
|
||||
for(int y = 0; y < height; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = ((u16*)data)[y*width + x];
|
||||
u32 r = ((val>>11) & 0x1F) * 8;
|
||||
u32 g = ((val>> 6) & 0x1F) * 8;
|
||||
u32 b = ((val>> 1) & 0x1F) * 8;
|
||||
u32 a = (val & 0x1) * 255;
|
||||
xbrzInputBuf[y*width + x] = (a << 24) | (b << 16) | (g << 8) | r;
|
||||
ParallelLoop([&](int l, int u) {
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = ((u16*)data)[y*width + x];
|
||||
u32 r = ((val>>11) & 0x1F) * 8;
|
||||
u32 g = ((val>> 6) & 0x1F) * 8;
|
||||
u32 b = ((val>> 1) & 0x1F) * 8;
|
||||
u32 a = (val & 0x1) * 255;
|
||||
xbrzInputBuf[y*width + x] = (a << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
}, 0, height);
|
||||
break;
|
||||
|
||||
default:
|
||||
ERROR_LOG(G3D, "iXBRZTexScaling: unsupported texture format");
|
||||
}
|
||||
|
||||
// scale and update values accordingly
|
||||
xbrz::scale(factor, xbrzInputBuf, xbrzBuf, width, height);
|
||||
// scale
|
||||
xbrz::ScalerCfg cfg;
|
||||
ParallelLoop([&](int l, int u) {
|
||||
xbrz::scale(factor, xbrzInputBuf, xbrzBuf, width, height, cfg, l, u);
|
||||
}, 0, height);
|
||||
|
||||
// update values accordingly
|
||||
data = xbrzBuf;
|
||||
dstFmt = GL_UNSIGNED_BYTE;
|
||||
width *= factor;
|
||||
height *= factor;
|
||||
|
||||
#ifdef SCALING_MEASURE_TIME
|
||||
if(width*height > 64*64*factor*factor) {
|
||||
double t = real_time_now() - t_start;
|
||||
NOTICE_LOG(MASTER_LOG, "TextureScaler: processed %9d pixels in %6.5lf seconds. (%9.0lf Mpixels/second)",
|
||||
width*height, t, (width*height)/(t*1000*1000));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
@ -21,55 +21,53 @@
|
||||
#include "../Globals.h"
|
||||
#include "../native/ext/glew/GL/glew.h"
|
||||
|
||||
template <typename T>
|
||||
class SimpleBuf {
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
|
||||
#include "native/thread/thread.h"
|
||||
#include "base/mutex.h"
|
||||
|
||||
// This is the simplest possible worker implementation I can think of
|
||||
// but entirely sufficient for the given purpose.
|
||||
// Only handles a single item of work at a time.
|
||||
class WorkerThread {
|
||||
public:
|
||||
SimpleBuf() : buf_(NULL), size_(0) {
|
||||
}
|
||||
WorkerThread();
|
||||
~WorkerThread();
|
||||
|
||||
SimpleBuf(size_t size) : buf_(NULL) {
|
||||
resize(size);
|
||||
}
|
||||
|
||||
~SimpleBuf() {
|
||||
if (buf_ != NULL) {
|
||||
FreeMemoryPages(buf_, size_ * sizeof(T));
|
||||
}
|
||||
}
|
||||
|
||||
inline T &operator[](size_t index) {
|
||||
return buf_[index];
|
||||
}
|
||||
|
||||
// Doesn't preserve contents.
|
||||
void resize(size_t size) {
|
||||
if (size_ < size) {
|
||||
if (buf_ != NULL) {
|
||||
FreeMemoryPages(buf_, size_ * sizeof(T));
|
||||
}
|
||||
buf_ = (T *)AllocateMemoryPages(size * sizeof(T));
|
||||
size_ = size;
|
||||
}
|
||||
}
|
||||
|
||||
T *data() {
|
||||
return buf_;
|
||||
}
|
||||
|
||||
size_t size() {
|
||||
return size_;
|
||||
}
|
||||
// submit a new work item
|
||||
void Process(const std::function<void()>& work);
|
||||
// wait for a submitted work item to be completed
|
||||
void WaitForCompletion();
|
||||
|
||||
private:
|
||||
T *buf_;
|
||||
size_t size_;
|
||||
std::thread *thread; // the worker thread
|
||||
condition_variable signal; // used to signal new work
|
||||
condition_variable done; // used to signal work completion
|
||||
recursive_mutex mutex, doneMutex; // associated with each respective condition variable
|
||||
volatile bool active, started;
|
||||
std::function<void()> work_; // the work to be done by this thread
|
||||
|
||||
void WorkFunc();
|
||||
|
||||
WorkerThread(const WorkerThread& other) { } // prevent copies
|
||||
};
|
||||
|
||||
class TextureScaler {
|
||||
public:
|
||||
TextureScaler();
|
||||
|
||||
void Scale(u32* &data, GLenum &dstfmt, int &width, int &height);
|
||||
|
||||
private:
|
||||
const int numThreads;
|
||||
std::vector<std::shared_ptr<WorkerThread>> workers;
|
||||
|
||||
bool workersStarted;
|
||||
void StartWorkers();
|
||||
|
||||
void ParallelLoop(std::function<void(int,int)> loop, int lower, int upper);
|
||||
|
||||
SimpleBuf<u32> bufInput;
|
||||
SimpleBuf<u32> bufOutput;
|
||||
};
|
||||
|
@ -25,6 +25,13 @@
|
||||
#include <limits>
|
||||
#include "config.h"
|
||||
|
||||
#ifdef max
|
||||
#undef max
|
||||
#endif
|
||||
#ifdef min
|
||||
#undef min
|
||||
#endif
|
||||
|
||||
namespace xbrz
|
||||
{
|
||||
/*
|
||||
|
Loading…
x
Reference in New Issue
Block a user