mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-02-26 00:45:49 +00:00
Jit the most common of the "ToFloat" texcoord conversions
This commit is contained in:
parent
37e3cf362f
commit
0727df6f0a
@ -51,6 +51,7 @@ static float MEMORY_ALIGNED16(boneMask[4]) = {1.0f, 1.0f, 1.0f, 0.0f};
|
||||
|
||||
|
||||
static const float by128 = 1.0f / 128.0f;
|
||||
static const float by16384 = 1.0f / 16384.0f;
|
||||
static const float by32768 = 1.0f / 32768.0f;
|
||||
|
||||
using namespace ArmGen;
|
||||
|
@ -39,6 +39,10 @@ static const float MEMORY_ALIGNED16( by32768[4] ) = {
|
||||
static const u32 MEMORY_ALIGNED16( threeMasks[4] ) = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0};
|
||||
static const u32 MEMORY_ALIGNED16( aOne[4] ) = {0, 0, 0, 0x3F800000};
|
||||
|
||||
static const float MEMORY_ALIGNED16(by16384[4]) = {
|
||||
1.0f / 16384.0f, 1.0f / 16384.0f, 1.0f / 16384.0f, 1.0f / 16384.0f,
|
||||
};
|
||||
|
||||
#ifdef _M_X64
|
||||
#ifdef _WIN32
|
||||
static const X64Reg tempReg1 = RAX;
|
||||
@ -90,6 +94,8 @@ static const JitLookup jitLookup[] = {
|
||||
|
||||
{&VertexDecoder::Step_TcU8, &VertexDecoderJitCache::Jit_TcU8},
|
||||
{&VertexDecoder::Step_TcU16, &VertexDecoderJitCache::Jit_TcU16},
|
||||
{&VertexDecoder::Step_TcU8ToFloat, &VertexDecoderJitCache::Jit_TcU8ToFloat},
|
||||
{&VertexDecoder::Step_TcU16ToFloat, &VertexDecoderJitCache::Jit_TcU16ToFloat},
|
||||
{&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat},
|
||||
{&VertexDecoder::Step_TcU16Double, &VertexDecoderJitCache::Jit_TcU16Double},
|
||||
|
||||
@ -98,6 +104,7 @@ static const JitLookup jitLookup[] = {
|
||||
{&VertexDecoder::Step_TcFloatPrescale, &VertexDecoderJitCache::Jit_TcFloatPrescale},
|
||||
|
||||
{&VertexDecoder::Step_TcU16Through, &VertexDecoderJitCache::Jit_TcU16Through},
|
||||
{&VertexDecoder::Step_TcU16ThroughToFloat, &VertexDecoderJitCache::Jit_TcU16ThroughToFloat},
|
||||
{&VertexDecoder::Step_TcFloatThrough, &VertexDecoderJitCache::Jit_TcFloatThrough},
|
||||
{&VertexDecoder::Step_TcU16ThroughDouble, &VertexDecoderJitCache::Jit_TcU16ThroughDouble},
|
||||
|
||||
@ -464,6 +471,26 @@ void VertexDecoderJitCache::Jit_TcU16() {
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1));
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcU8ToFloat() {
|
||||
// TODO: The first five instructions could be done in 1 or 2 in SSE4
|
||||
MOVZX(32, 8, tempReg1, MDisp(srcReg, dec_->tcoff));
|
||||
MOVZX(32, 8, tempReg2, MDisp(srcReg, dec_->tcoff + 1));
|
||||
CVTSI2SS(fpScratchReg, R(tempReg1));
|
||||
CVTSI2SS(fpScratchReg2, R(tempReg2));
|
||||
UNPCKLPS(fpScratchReg, R(fpScratchReg2));
|
||||
MULPS(fpScratchReg, M(&by128));
|
||||
MOVQ_xmm(MDisp(dstReg, dec_->decFmt.uvoff), fpScratchReg);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcU16ToFloat() {
|
||||
PXOR(fpScratchReg2, R(fpScratchReg2));
|
||||
MOVD_xmm(fpScratchReg, MDisp(srcReg, dec_->tcoff));
|
||||
PUNPCKLWD(fpScratchReg, R(fpScratchReg2));
|
||||
CVTDQ2PS(fpScratchReg, R(fpScratchReg));
|
||||
MULPS(fpScratchReg, M(&by32768));
|
||||
MOVQ_xmm(MDisp(dstReg, dec_->decFmt.uvoff), fpScratchReg);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcU16Double() {
|
||||
MOVZX(32, 16, tempReg1, MDisp(srcReg, dec_->tcoff));
|
||||
MOVZX(32, 16, tempReg2, MDisp(srcReg, dec_->tcoff + 2));
|
||||
@ -525,6 +552,14 @@ void VertexDecoderJitCache::Jit_TcU16Through() {
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1));
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcU16ThroughToFloat() {
|
||||
PXOR(fpScratchReg2, R(fpScratchReg2));
|
||||
MOVD_xmm(fpScratchReg, MDisp(srcReg, dec_->tcoff));
|
||||
PUNPCKLWD(fpScratchReg, R(fpScratchReg2));
|
||||
CVTDQ2PS(fpScratchReg, R(fpScratchReg));
|
||||
MOVQ_xmm(MDisp(dstReg, dec_->decFmt.uvoff), fpScratchReg);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcU16ThroughDouble() {
|
||||
MOVZX(32, 16, tempReg1, MDisp(srcReg, dec_->tcoff));
|
||||
MOVZX(32, 16, tempReg2, MDisp(srcReg, dec_->tcoff + 2));
|
||||
|
Loading…
x
Reference in New Issue
Block a user