Align some vertex arrays to page size. It's said to possibly be beneficial.

2025-02-02 11:43:31 +00:00 · 2013-01-29 00:48:13 +01:00 · 2013-01-29 00:48:13 +01:00 · 366583d34f
commit 366583d34f
parent 630c025fd5
5 changed files with 41 additions and 39 deletions
--- a/GPU/GLES/DisplayListInterpreter.cpp
+++ b/GPU/GLES/DisplayListInterpreter.cpp
@ -163,13 +163,11 @@ const int flushBeforeCommandList[] = {
 	*/
 };

-GLES_GPU::GLES_GPU(int renderWidth, int renderHeight)
+GLES_GPU::GLES_GPU()
 :		interruptsEnabled_(true),
 		displayFramebufPtr_(0),
 		prevDisplayFramebuf_(0),
 		prevPrevDisplayFramebuf_(0),
-		renderWidth_(renderWidth),
-		renderHeight_(renderHeight),
 		resized_(false)
 {
 	shaderManager_ = new ShaderManager();
--- a/GPU/GLES/DisplayListInterpreter.h
+++ b/GPU/GLES/DisplayListInterpreter.h
@ -32,7 +32,7 @@ class LinkedShader;
 class GLES_GPU : public GPUCommon
 {
 public:
-	GLES_GPU(int renderWidth, int renderHeight);
+	GLES_GPU();
 	~GLES_GPU();
 	virtual void InitClear();
 	virtual void PreExecuteOp(u32 op, u32 diff);
@ -81,9 +81,6 @@ private:
 	u32 displayStride_;
 	int displayFormat_;

-	int renderWidth_;
-	int renderHeight_;
-
 	struct CmdProcessorState {
 		u32 pc;
 		u32 stallAddr;
--- a/GPU/GLES/TransformPipeline.cpp
+++ b/GPU/GLES/TransformPipeline.cpp
@ -17,6 +17,7 @@

 #include "base/timeutil.h"

+#include "Common/MemoryUtil.h"
 #include "../../Core/MemMap.h"
 #include "../../Core/Host.h"
 #include "../../Core/System.h"
@ -45,6 +46,12 @@ const GLuint glprim[8] = {
 	GL_TRIANGLES,	 // With OpenGL ES we have to expand sprites into triangles, tripling the data instead of doubling. sigh. OpenGL ES, Y U NO SUPPORT GL_QUADS?
 };

+enum {
+	DECODED_VERTEX_BUFFER_SIZE = 65536 * 48,
+	DECODED_INDEX_BUFFER_SIZE = 65536 * 2,
+	TRANSFORMED_VERTEX_BUFFER_SIZE = 65536 * sizeof(TransformedVertex)
+};
+
 TransformDrawEngine::TransformDrawEngine()
 	: numDrawCalls(0),
 	  collectedVerts(0),
@ -52,10 +59,13 @@ TransformDrawEngine::TransformDrawEngine()
 		lastVType_(-1),
 		curVbo_(0),
 		shaderManager_(0) {
-	decoded = new u8[65536 * 48];
-	decIndex = new u16[65536];
-	transformed = new TransformedVertex[65536];
-	transformedExpanded = new TransformedVertex[65536 * 3];
+	// Allocate nicely aligned memory. Maybe graphics drivers will
+	// appreciate it.
+	// All this is a LOT of memory, need to see if we can cut down somehow.
+	decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE);
+	decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE);
+	transformed = (TransformedVertex *)AllocateMemoryPages(TRANSFORMED_VERTEX_BUFFER_SIZE);
+	transformedExpanded = (TransformedVertex *)AllocateMemoryPages(3 * TRANSFORMED_VERTEX_BUFFER_SIZE);
 	memset(vbo_, 0, sizeof(vbo_));
 	memset(ebo_, 0, sizeof(ebo_));
 	indexGen.Setup(decIndex);
@ -65,10 +75,10 @@ TransformDrawEngine::TransformDrawEngine()

 TransformDrawEngine::~TransformDrawEngine() {
 	DestroyDeviceObjects();
-	delete [] decoded;
-	delete [] decIndex;
-	delete [] transformed;
-	delete [] transformedExpanded;
+	FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
+	FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
+	FreeMemoryPages(transformed, TRANSFORMED_VERTEX_BUFFER_SIZE);
+	FreeMemoryPages(transformedExpanded, 3 * TRANSFORMED_VERTEX_BUFFER_SIZE);
 	unregister_gl_resource_holder(this);
 }

@ -293,7 +303,7 @@ struct GlTypeInfo {
 	GLboolean normalized;
 };

-const GlTypeInfo GLComp[8] = {
+const GlTypeInfo GLComp[] = {
 	{0}, // 	DEC_NONE,
 	{GL_FLOAT, 1, GL_FALSE}, // 	DEC_FLOAT_1,
 	{GL_FLOAT, 2, GL_FALSE}, // 	DEC_FLOAT_2,
@ -302,6 +312,7 @@ const GlTypeInfo GLComp[8] = {
 	{GL_BYTE, 4, GL_TRUE}, // 	DEC_S8_3,
 	{GL_SHORT, 4, GL_TRUE},// 	DEC_S16_3,
 	{GL_UNSIGNED_BYTE, 4, GL_TRUE},// 	DEC_U8_4,
+	{GL_UNSIGNED_BYTE, 3, GL_TRUE},// 	DEC_U8_3,
 };

 static inline void VertexAttribSetup(int attrib, int fmt, int stride, u8 *ptr) {
--- a/GPU/GLES/VertexDecoder.cpp
+++ b/GPU/GLES/VertexDecoder.cpp
@ -125,8 +125,7 @@ void VertexDecoder::Step_WeightsFloat() const
 {
 	float *wt = (float *)(decoded_ + decFmt.w0off);
 	const float *wdata = (const float*)(ptr_);
-	for (int j = 0; j < nweights; j++)
-		wt[j] = wdata[j];
+	memcpy(wt, wdata, nweights * sizeof(float));
 }

 void VertexDecoder::Step_TcU8() const
@ -157,8 +156,7 @@ void VertexDecoder::Step_TcFloat() const
 {
 	float *uv = (float *)(decoded_ + decFmt.uvoff);
 	const float *uvdata = (const float*)(ptr_ + tcoff);
-	uv[0] = uvdata[0];
-	uv[1] = uvdata[1];
+	memcpy(uv, uvdata, sizeof(float) * 2);
 }

 void VertexDecoder::Step_TcFloatThrough() const
@ -377,10 +375,9 @@ void VertexDecoder::Step_PosS16() const

 void VertexDecoder::Step_PosFloat() const
 {
-	float *v = (float *)(decoded_ + decFmt.posoff);
-	const float *fv = (const float*)(ptr_ + posoff);
-	for (int j = 0; j < 3; j++)
-		v[j] = fv[j];
+	u8 *v = (u8 *)(decoded_ + decFmt.posoff);
+	const u8 *fv = (const u8*)(ptr_ + posoff);
+	memcpy(v, fv, 12);
 }

 void VertexDecoder::Step_PosS8Through() const
@ -405,10 +402,9 @@ void VertexDecoder::Step_PosS16Through() const

 void VertexDecoder::Step_PosFloatThrough() const
 {
-	float *v = (float *)(decoded_ + decFmt.posoff);
-	const float *fv = (const float*)(ptr_ + posoff);
-	for (int j = 0; j < 3; j++)
-		v[j] = fv[j];
+	u8 *v = (u8 *)(decoded_ + decFmt.posoff);
+	const u8 *fv = (const u8*)(ptr_ + posoff);
+	memcpy(v, fv, 12);
 }

 void VertexDecoder::Step_PosS8Morph() const
@ -445,21 +441,21 @@ void VertexDecoder::Step_PosFloatMorph() const
 	}
 }

-const StepFunction wtstep[4] = {
+static const StepFunction wtstep[4] = {
 	0,
 	&VertexDecoder::Step_WeightsU8,
 	&VertexDecoder::Step_WeightsU16,
 	&VertexDecoder::Step_WeightsFloat,
 };

-const StepFunction tcstep[4] = {
+static const StepFunction tcstep[4] = {
 	0,
 	&VertexDecoder::Step_TcU8,
 	&VertexDecoder::Step_TcU16,
 	&VertexDecoder::Step_TcFloat,
 };

-const StepFunction tcstep_through[4] = {
+static const StepFunction tcstep_through[4] = {
 	0,
 	&VertexDecoder::Step_TcU8,
 	&VertexDecoder::Step_TcU16Through,
@ -468,7 +464,7 @@ const StepFunction tcstep_through[4] = {

 // TODO: Tc Morph

-const StepFunction colstep[8] = {
+static const StepFunction colstep[8] = {
 	0, 0, 0, 0,
 	&VertexDecoder::Step_Color565,
 	&VertexDecoder::Step_Color5551,
@ -476,7 +472,7 @@ const StepFunction colstep[8] = {
 	&VertexDecoder::Step_Color8888,
 };

-const StepFunction colstep_morph[8] = {
+static const StepFunction colstep_morph[8] = {
 	0, 0, 0, 0,
 	&VertexDecoder::Step_Color565Morph,
 	&VertexDecoder::Step_Color5551Morph,
@ -484,35 +480,35 @@ const StepFunction colstep_morph[8] = {
 	&VertexDecoder::Step_Color8888Morph,
 };

-const StepFunction nrmstep[4] = {
+static const StepFunction nrmstep[4] = {
 	0,
 	&VertexDecoder::Step_NormalS8,
 	&VertexDecoder::Step_NormalS16,
 	&VertexDecoder::Step_NormalFloat,
 };

-const StepFunction nrmstep_morph[4] = {
+static const StepFunction nrmstep_morph[4] = {
 	0,
 	&VertexDecoder::Step_NormalS8Morph,
 	&VertexDecoder::Step_NormalS16Morph,
 	&VertexDecoder::Step_NormalFloatMorph,
 };

-const StepFunction posstep[4] = {
+static const StepFunction posstep[4] = {
 	0,
 	&VertexDecoder::Step_PosS8,
 	&VertexDecoder::Step_PosS16,
 	&VertexDecoder::Step_PosFloat,
 };

-const StepFunction posstep_morph[4] = {
+static const StepFunction posstep_morph[4] = {
 	0,
 	&VertexDecoder::Step_PosS8Morph,
 	&VertexDecoder::Step_PosS16Morph,
 	&VertexDecoder::Step_PosFloatMorph,
 };

-const StepFunction posstep_through[4] = {
+static const StepFunction posstep_through[4] = {
 	0,
 	&VertexDecoder::Step_PosS8Through,
 	&VertexDecoder::Step_PosS16Through,
--- a/GPU/GPUState.cpp
+++ b/GPU/GPUState.cpp
@ -61,7 +61,7 @@ void InitGfxState()
 		gpu = new NullGPU();
 		break;
 	case GPU_GLES:
-		gpu = new GLES_GPU(PSP_CoreParameter().renderWidth, PSP_CoreParameter().renderHeight);
+		gpu = new GLES_GPU();
 		break;
 	}
 }