From c49f9695635fa4f947eb8f5082553ed2c07e89d6 Mon Sep 17 00:00:00 2001 From: hrydgard Date: Sun, 15 Feb 2009 14:46:17 +0000 Subject: [PATCH] vertexloader_pos cleanup - remove the branch per vertex. not much of a speedup though. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@2256 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/VideoCommon/Src/DataReader.h | 18 +- Source/Core/VideoCommon/Src/VertexLoader.cpp | 30 ++-- .../VideoCommon/Src/VertexLoader_Position.cpp | 159 ++++++------------ .../VideoCommon/Src/VertexLoader_Position.h | 48 ++++-- 4 files changed, 119 insertions(+), 136 deletions(-) diff --git a/Source/Core/VideoCommon/Src/DataReader.h b/Source/Core/VideoCommon/Src/DataReader.h index 97340c2928..dfa776e4c3 100644 --- a/Source/Core/VideoCommon/Src/DataReader.h +++ b/Source/Core/VideoCommon/Src/DataReader.h @@ -79,10 +79,26 @@ inline u16 DataRead() return tmp; } +template <> +inline s16 DataRead() +{ + s16 tmp = (s16)Common::swap16(*(u16*)g_pVideoData); + g_pVideoData += 2; + return tmp; +} + template <> inline u32 DataRead() { - u32 tmp = Common::swap32(*(u32*)g_pVideoData); + u32 tmp = (u32)Common::swap32(*(u32*)g_pVideoData); + g_pVideoData += 4; + return tmp; +} + +template <> +inline s32 DataRead() +{ + s32 tmp = (s32)Common::swap32(*(u32*)g_pVideoData); g_pVideoData += 4; return tmp; } diff --git a/Source/Core/VideoCommon/Src/VertexLoader.cpp b/Source/Core/VideoCommon/Src/VertexLoader.cpp index 084eba7ed1..114783c546 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp @@ -211,22 +211,22 @@ void VertexLoader::CompileVertexTranslator() case NOT_PRESENT: {_assert_msg_(0, "Vertex descriptor without position!", "WTF?");} break; case DIRECT: switch (m_VtxAttr.PosFormat) { - case FORMAT_UBYTE: m_VertexSize += m_VtxAttr.PosElements?3:2; WriteCall(Pos_ReadDirect_UByte); break; - case FORMAT_BYTE: m_VertexSize += m_VtxAttr.PosElements?3:2; WriteCall(Pos_ReadDirect_Byte); break; - case FORMAT_USHORT: m_VertexSize += m_VtxAttr.PosElements?6:4; WriteCall(Pos_ReadDirect_UShort); break; - case FORMAT_SHORT: m_VertexSize += m_VtxAttr.PosElements?6:4; WriteCall(Pos_ReadDirect_Short); break; - case FORMAT_FLOAT: m_VertexSize += m_VtxAttr.PosElements?12:8; WriteCall(Pos_ReadDirect_Float); break; + case FORMAT_UBYTE: m_VertexSize += m_VtxAttr.PosElements?3:2; WriteCall(m_VtxAttr.PosElements?Pos_ReadDirect_UByte3:Pos_ReadDirect_UByte2); break; + case FORMAT_BYTE: m_VertexSize += m_VtxAttr.PosElements?3:2; WriteCall(m_VtxAttr.PosElements?Pos_ReadDirect_Byte3:Pos_ReadDirect_Byte2); break; + case FORMAT_USHORT: m_VertexSize += m_VtxAttr.PosElements?6:4; WriteCall(m_VtxAttr.PosElements?Pos_ReadDirect_UShort3:Pos_ReadDirect_UShort2); break; + case FORMAT_SHORT: m_VertexSize += m_VtxAttr.PosElements?6:4; WriteCall(m_VtxAttr.PosElements?Pos_ReadDirect_Short3:Pos_ReadDirect_Short2); break; + case FORMAT_FLOAT: m_VertexSize += m_VtxAttr.PosElements?12:8; WriteCall(m_VtxAttr.PosElements?Pos_ReadDirect_Float3:Pos_ReadDirect_Float2); break; default: _assert_(0); break; } nat_offset += 12; break; case INDEX8: switch (m_VtxAttr.PosFormat) { - case FORMAT_UBYTE: WriteCall(Pos_ReadIndex8_UByte); break; //WTF? - case FORMAT_BYTE: WriteCall(Pos_ReadIndex8_Byte); break; - case FORMAT_USHORT: WriteCall(Pos_ReadIndex8_UShort); break; - case FORMAT_SHORT: WriteCall(Pos_ReadIndex8_Short); break; - case FORMAT_FLOAT: WriteCall(Pos_ReadIndex8_Float); break; + case FORMAT_UBYTE: WriteCall(m_VtxAttr.PosElements?Pos_ReadIndex8_UByte3:Pos_ReadIndex8_UByte2); break; //WTF? + case FORMAT_BYTE: WriteCall(m_VtxAttr.PosElements?Pos_ReadIndex8_Byte3:Pos_ReadIndex8_Byte2); break; + case FORMAT_USHORT: WriteCall(m_VtxAttr.PosElements?Pos_ReadIndex8_UShort3:Pos_ReadIndex8_UShort2); break; + case FORMAT_SHORT: WriteCall(m_VtxAttr.PosElements?Pos_ReadIndex8_Short3:Pos_ReadIndex8_Short2); break; + case FORMAT_FLOAT: WriteCall(m_VtxAttr.PosElements?Pos_ReadIndex8_Float3:Pos_ReadIndex8_Float2); break; default: _assert_(0); break; } m_VertexSize += 1; @@ -234,11 +234,11 @@ void VertexLoader::CompileVertexTranslator() break; case INDEX16: switch (m_VtxAttr.PosFormat) { - case FORMAT_UBYTE: WriteCall(Pos_ReadIndex16_UByte); break; - case FORMAT_BYTE: WriteCall(Pos_ReadIndex16_Byte); break; - case FORMAT_USHORT: WriteCall(Pos_ReadIndex16_UShort); break; - case FORMAT_SHORT: WriteCall(Pos_ReadIndex16_Short); break; - case FORMAT_FLOAT: WriteCall(Pos_ReadIndex16_Float); break; + case FORMAT_UBYTE: WriteCall(m_VtxAttr.PosElements?Pos_ReadIndex16_UByte3:Pos_ReadIndex16_UByte2); break; + case FORMAT_BYTE: WriteCall(m_VtxAttr.PosElements?Pos_ReadIndex16_Byte3:Pos_ReadIndex16_Byte2); break; + case FORMAT_USHORT: WriteCall(m_VtxAttr.PosElements?Pos_ReadIndex16_UShort3:Pos_ReadIndex16_UShort2); break; + case FORMAT_SHORT: WriteCall(m_VtxAttr.PosElements?Pos_ReadIndex16_Short3:Pos_ReadIndex16_Short2); break; + case FORMAT_FLOAT: WriteCall(m_VtxAttr.PosElements?Pos_ReadIndex16_Float3:Pos_ReadIndex16_Float2); break; default: _assert_(0); break; } m_VertexSize += 2; diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp b/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp index d0032669cc..278b32f92f 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp @@ -76,74 +76,58 @@ MOVUPS(MOffset(EDI, 0), XMM0); // ============================================================================== // Direct // ============================================================================== -void LOADERDECL Pos_ReadDirect_UByte() -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * posScale; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU8() * posScale; - if (pVtxAttr->PosElements) - ((float*)VertexManager::s_pCurBufferPointer)[2] = (float)DataReadU8() * posScale; - else - ((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f; - LOG_VTX(); - VertexManager::s_pCurBufferPointer += 12; -} -void LOADERDECL Pos_ReadDirect_Byte() -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * posScale; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)DataReadU8() * posScale; - if (pVtxAttr->PosElements) - ((float*)VertexManager::s_pCurBufferPointer)[2] = (float)(s8)DataReadU8() * posScale; - else - ((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0; - LOG_VTX(); - VertexManager::s_pCurBufferPointer += 12; -} - -void LOADERDECL Pos_ReadDirect_UShort() +template +void Pos_ReadDirect() { - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * posScale; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU16() * posScale; - if (pVtxAttr->PosElements) - ((float*)VertexManager::s_pCurBufferPointer)[2] = (float)DataReadU16() * posScale; + ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(T)DataRead() * posScale; + ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(T)DataRead() * posScale; + if (three) + ((float*)VertexManager::s_pCurBufferPointer)[2] = (float)(T)DataRead() * posScale; else ((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f; LOG_VTX(); VertexManager::s_pCurBufferPointer += 12; } -void LOADERDECL Pos_ReadDirect_Short() -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * posScale; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)DataReadU16() * posScale; - if (pVtxAttr->PosElements) - ((float*)VertexManager::s_pCurBufferPointer)[2] = (float)(s16)DataReadU16() * posScale; - else - ((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f; - LOG_VTX(); - VertexManager::s_pCurBufferPointer += 12; -} +void LOADERDECL Pos_ReadDirect_UByte3() { Pos_ReadDirect(); } +void LOADERDECL Pos_ReadDirect_Byte3() { Pos_ReadDirect(); } +void LOADERDECL Pos_ReadDirect_UShort3() { Pos_ReadDirect(); } +void LOADERDECL Pos_ReadDirect_Short3() { Pos_ReadDirect(); } -void LOADERDECL Pos_ReadDirect_Float() +void LOADERDECL Pos_ReadDirect_UByte2() { Pos_ReadDirect(); } +void LOADERDECL Pos_ReadDirect_Byte2() { Pos_ReadDirect(); } +void LOADERDECL Pos_ReadDirect_UShort2() { Pos_ReadDirect(); } +void LOADERDECL Pos_ReadDirect_Short2() { Pos_ReadDirect(); } + +void LOADERDECL Pos_ReadDirect_Float3() { // No need to use floating point here. ((u32 *)VertexManager::s_pCurBufferPointer)[0] = DataReadU32(); ((u32 *)VertexManager::s_pCurBufferPointer)[1] = DataReadU32(); - if (pVtxAttr->PosElements) - ((u32 *)VertexManager::s_pCurBufferPointer)[2] = DataReadU32(); - else - ((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f; + ((u32 *)VertexManager::s_pCurBufferPointer)[2] = DataReadU32(); LOG_VTX(); VertexManager::s_pCurBufferPointer += 12; } -template +void LOADERDECL Pos_ReadDirect_Float2() +{ + // No need to use floating point here. + ((u32 *)VertexManager::s_pCurBufferPointer)[0] = DataReadU32(); + ((u32 *)VertexManager::s_pCurBufferPointer)[1] = DataReadU32(); + ((u32 *)VertexManager::s_pCurBufferPointer)[2] = 0x3f800000; // 1.0f + LOG_VTX(); + VertexManager::s_pCurBufferPointer += 12; +} + + +template inline void Pos_ReadIndex_Byte(int Index) { const u8* pData = cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]); ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)(pData[0])) * posScale; ((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)(pData[1])) * posScale; - if (pVtxAttr->PosElements) + if (three) ((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)(pData[2])) * posScale; else ((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f; @@ -151,13 +135,13 @@ inline void Pos_ReadIndex_Byte(int Index) VertexManager::s_pCurBufferPointer += 12; } -template +template inline void Pos_ReadIndex_Short(int Index) { const u16* pData = (const u16 *)(cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION])); ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)Common::swap16(pData[0])) * posScale; ((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)Common::swap16(pData[1])) * posScale; - if (pVtxAttr->PosElements) + if (three) ((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)Common::swap16(pData[2])) * posScale; else ((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f; @@ -165,12 +149,13 @@ inline void Pos_ReadIndex_Short(int Index) VertexManager::s_pCurBufferPointer += 12; } +template inline void Pos_ReadIndex_Float(int Index) { const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION])); ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); ((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]); - if (pVtxAttr->PosElements) + if (three) ((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]); else ((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f; @@ -181,65 +166,29 @@ inline void Pos_ReadIndex_Float(int Index) // ============================================================================== // Index 8 // ============================================================================== -void LOADERDECL Pos_ReadIndex8_UByte() -{ - u8 Index = DataReadU8(); - Pos_ReadIndex_Byte(Index); -} - -void LOADERDECL Pos_ReadIndex8_Byte() -{ - u8 Index = DataReadU8(); - Pos_ReadIndex_Byte(Index); -} - -void LOADERDECL Pos_ReadIndex8_UShort() -{ - u8 Index = DataReadU8(); - Pos_ReadIndex_Short(Index); -} - -void LOADERDECL Pos_ReadIndex8_Short() -{ - u8 Index = DataReadU8(); - Pos_ReadIndex_Short(Index); -} - -void LOADERDECL Pos_ReadIndex8_Float() -{ - u8 Index = DataReadU8(); - Pos_ReadIndex_Float(Index); -} +void LOADERDECL Pos_ReadIndex8_UByte3() {Pos_ReadIndex_Byte (DataReadU8());} +void LOADERDECL Pos_ReadIndex8_Byte3() {Pos_ReadIndex_Byte (DataReadU8());} +void LOADERDECL Pos_ReadIndex8_UShort3() {Pos_ReadIndex_Short (DataReadU8());} +void LOADERDECL Pos_ReadIndex8_Short3() {Pos_ReadIndex_Short (DataReadU8());} +void LOADERDECL Pos_ReadIndex8_Float3() {Pos_ReadIndex_Float (DataReadU8());} +void LOADERDECL Pos_ReadIndex8_UByte2() {Pos_ReadIndex_Byte(DataReadU8());} +void LOADERDECL Pos_ReadIndex8_Byte2() {Pos_ReadIndex_Byte(DataReadU8());} +void LOADERDECL Pos_ReadIndex8_UShort2() {Pos_ReadIndex_Short(DataReadU8());} +void LOADERDECL Pos_ReadIndex8_Short2() {Pos_ReadIndex_Short(DataReadU8());} +void LOADERDECL Pos_ReadIndex8_Float2() {Pos_ReadIndex_Float (DataReadU8());} // ============================================================================== // Index 16 // ============================================================================== - -void LOADERDECL Pos_ReadIndex16_UByte(){ - u16 Index = DataReadU16(); - Pos_ReadIndex_Byte(Index); -} - -void LOADERDECL Pos_ReadIndex16_Byte(){ - u16 Index = DataReadU16(); - Pos_ReadIndex_Byte(Index); -} - -void LOADERDECL Pos_ReadIndex16_UShort(){ - u16 Index = DataReadU16(); - Pos_ReadIndex_Short(Index); -} - -void LOADERDECL Pos_ReadIndex16_Short() -{ - u16 Index = DataReadU16(); - Pos_ReadIndex_Short(Index); -} - -void LOADERDECL Pos_ReadIndex16_Float() -{ - u16 Index = DataReadU16(); - Pos_ReadIndex_Float(Index); -} +void LOADERDECL Pos_ReadIndex16_UByte3() {Pos_ReadIndex_Byte (DataReadU16());} +void LOADERDECL Pos_ReadIndex16_Byte3() {Pos_ReadIndex_Byte (DataReadU16());} +void LOADERDECL Pos_ReadIndex16_UShort3() {Pos_ReadIndex_Short (DataReadU16());} +void LOADERDECL Pos_ReadIndex16_Short3() {Pos_ReadIndex_Short (DataReadU16());} +void LOADERDECL Pos_ReadIndex16_Float3() {Pos_ReadIndex_Float (DataReadU16());} +void LOADERDECL Pos_ReadIndex16_UByte2() {Pos_ReadIndex_Byte(DataReadU16());} +void LOADERDECL Pos_ReadIndex16_Byte2() {Pos_ReadIndex_Byte(DataReadU16());} +void LOADERDECL Pos_ReadIndex16_UShort2() {Pos_ReadIndex_Short(DataReadU16());} +void LOADERDECL Pos_ReadIndex16_Short2() {Pos_ReadIndex_Short(DataReadU16());} +void LOADERDECL Pos_ReadIndex16_Float2() {Pos_ReadIndex_Float (DataReadU16());} #endif diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Position.h b/Source/Core/VideoCommon/Src/VertexLoader_Position.h index 4874e25ae3..cfb612de42 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Position.h +++ b/Source/Core/VideoCommon/Src/VertexLoader_Position.h @@ -18,22 +18,40 @@ #ifndef VERTEXLOADER_POSITION_H #define VERTEXLOADER_POSITION_H -void LOADERDECL Pos_ReadDirect_UByte(); -void LOADERDECL Pos_ReadDirect_Byte(); -void LOADERDECL Pos_ReadDirect_UShort(); -void LOADERDECL Pos_ReadDirect_Short(); -void LOADERDECL Pos_ReadDirect_Float(); +void LOADERDECL Pos_ReadDirect_UByte3(); +void LOADERDECL Pos_ReadDirect_Byte3(); +void LOADERDECL Pos_ReadDirect_UShort3(); +void LOADERDECL Pos_ReadDirect_Short3(); +void LOADERDECL Pos_ReadDirect_Float3(); -void LOADERDECL Pos_ReadIndex8_UByte(); -void LOADERDECL Pos_ReadIndex8_Byte(); -void LOADERDECL Pos_ReadIndex8_UShort(); -void LOADERDECL Pos_ReadIndex8_Short(); -void LOADERDECL Pos_ReadIndex8_Float(); +void LOADERDECL Pos_ReadIndex8_UByte3(); +void LOADERDECL Pos_ReadIndex8_Byte3(); +void LOADERDECL Pos_ReadIndex8_UShort3(); +void LOADERDECL Pos_ReadIndex8_Short3(); +void LOADERDECL Pos_ReadIndex8_Float3(); -void LOADERDECL Pos_ReadIndex16_UByte(); -void LOADERDECL Pos_ReadIndex16_Byte(); -void LOADERDECL Pos_ReadIndex16_UShort(); -void LOADERDECL Pos_ReadIndex16_Short(); -void LOADERDECL Pos_ReadIndex16_Float(); +void LOADERDECL Pos_ReadIndex16_UByte3(); +void LOADERDECL Pos_ReadIndex16_Byte3(); +void LOADERDECL Pos_ReadIndex16_UShort3(); +void LOADERDECL Pos_ReadIndex16_Short3(); +void LOADERDECL Pos_ReadIndex16_Float3(); + +void LOADERDECL Pos_ReadDirect_UByte2(); +void LOADERDECL Pos_ReadDirect_Byte2(); +void LOADERDECL Pos_ReadDirect_UShort2(); +void LOADERDECL Pos_ReadDirect_Short2(); +void LOADERDECL Pos_ReadDirect_Float2(); + +void LOADERDECL Pos_ReadIndex8_UByte2(); +void LOADERDECL Pos_ReadIndex8_Byte2(); +void LOADERDECL Pos_ReadIndex8_UShort2(); +void LOADERDECL Pos_ReadIndex8_Short2(); +void LOADERDECL Pos_ReadIndex8_Float2(); + +void LOADERDECL Pos_ReadIndex16_UByte2(); +void LOADERDECL Pos_ReadIndex16_Byte2(); +void LOADERDECL Pos_ReadIndex16_UShort2(); +void LOADERDECL Pos_ReadIndex16_Short2(); +void LOADERDECL Pos_ReadIndex16_Float2(); #endif