From c7edf73cdb89d046c81d36a032aad0b5a0902267 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Tue, 5 Nov 2013 00:31:00 -0800 Subject: [PATCH] Small optimizations to the vertexjit. --- GPU/GLES/VertexDecoder.cpp | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/GPU/GLES/VertexDecoder.cpp b/GPU/GLES/VertexDecoder.cpp index 395ed92ea..91126e8aa 100644 --- a/GPU/GLES/VertexDecoder.cpp +++ b/GPU/GLES/VertexDecoder.cpp @@ -1034,16 +1034,13 @@ void VertexDecoderJitCache::Jit_NormalS16() { } void VertexDecoderJitCache::Jit_NormalFloat() { - //ADD(scratchReg, srcReg, dec_->nrmoff); - //LDMIA(scratchReg, false, 3, tempReg1, tempReg2, tempReg3); - //ADD(scratchReg, dstReg, dec_->decFmt.nrmoff); - //STMIA(scratchReg, false, 3, tempReg1, tempReg2, tempReg3); + // Might not be aligned to 4, so we can't use LDMIA. LDR(tempReg1, srcReg, dec_->nrmoff); LDR(tempReg2, srcReg, dec_->nrmoff + 4); LDR(tempReg3, srcReg, dec_->nrmoff + 8); - STR(tempReg1, dstReg, dec_->decFmt.nrmoff); - STR(tempReg2, dstReg, dec_->decFmt.nrmoff + 4); - STR(tempReg3, dstReg, dec_->decFmt.nrmoff + 8); + // But this is always aligned to 4 so we're safe. + ADD(scratchReg, dstReg, dec_->decFmt.nrmoff); + STMIA(scratchReg, false, 3, tempReg1, tempReg2, tempReg3); } // Through expands into floats, always. Might want to look at changing this. @@ -1085,16 +1082,13 @@ void VertexDecoderJitCache::Jit_PosS16() { // Just copy 12 bytes. void VertexDecoderJitCache::Jit_PosFloat() { - //ADD(scratchReg, srcReg, dec_->posoff); - //LDMIA(scratchReg, false, 3, tempReg1, tempReg2, tempReg3); - //ADD(scratchReg, dstReg, dec_->decFmt.posoff); - //STMIA(scratchReg, false, 3, tempReg1, tempReg2, tempReg3); + // Might not be aligned to 4, so we can't use LDMIA. LDR(tempReg1, srcReg, dec_->posoff); LDR(tempReg2, srcReg, dec_->posoff + 4); LDR(tempReg3, srcReg, dec_->posoff + 8); - STR(tempReg1, dstReg, dec_->decFmt.posoff); - STR(tempReg2, dstReg, dec_->decFmt.posoff + 4); - STR(tempReg3, dstReg, dec_->decFmt.posoff + 8); + // But this is always aligned to 4 so we're safe. + ADD(scratchReg, dstReg, dec_->decFmt.posoff); + STMIA(scratchReg, false, 3, tempReg1, tempReg2, tempReg3); } #elif defined(_M_X64) || defined(_M_IX86) @@ -1278,10 +1272,15 @@ void VertexDecoderJitCache::Jit_TcU16Through() { } void VertexDecoderJitCache::Jit_TcFloatThrough() { +#ifdef _M_X64 + MOV(64, R(tempReg1), MDisp(srcReg, dec_->tcoff)); + MOV(64, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1)); +#else MOV(32, R(tempReg1), MDisp(srcReg, dec_->tcoff)); MOV(32, R(tempReg2), MDisp(srcReg, dec_->tcoff + 4)); MOV(32, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1)); MOV(32, MDisp(dstReg, dec_->decFmt.uvoff + 4), R(tempReg2)); +#endif } void VertexDecoderJitCache::Jit_Color8888() {