mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-02-26 00:45:49 +00:00
vertexjit: Try to avoid a few more VFP switches.
This commit is contained in:
parent
58fe022ecd
commit
4c48031724
@ -1117,8 +1117,8 @@ void VertexDecoderJitCache::Jit_PosFloat() {
|
||||
void VertexDecoderJitCache::Jit_NormalS8Skin() {
|
||||
if (NEONSkinning) {
|
||||
ADD(scratchReg, srcReg, dec_->nrmoff);
|
||||
MOVI2F(S15, 1.0f/128.0f, scratchReg2);
|
||||
VLD1_lane(I_32, neonScratchReg, scratchReg, 0, false);
|
||||
MOVI2F(S15, 1.0f/128.0f, scratchReg);
|
||||
VMOVL(I_8 | I_SIGNED, neonScratchRegQ, neonScratchReg); // Widen to 16-bit
|
||||
VMOVL(I_16 | I_SIGNED, neonScratchRegQ, neonScratchReg); // Widen to 32-bit
|
||||
VCVT(F_32 | I_SIGNED, neonScratchRegQ, neonScratchRegQ);
|
||||
@ -1144,8 +1144,8 @@ void VertexDecoderJitCache::Jit_NormalS8Skin() {
|
||||
void VertexDecoderJitCache::Jit_NormalS16Skin() {
|
||||
if (NEONSkinning) {
|
||||
ADD(scratchReg, srcReg, dec_->nrmoff);
|
||||
MOVI2F(S15, 1.0f/32768, scratchReg2);
|
||||
VLD1(I_32, neonScratchReg, scratchReg, 1, ALIGN_NONE);
|
||||
MOVI2F(S15, 1.0f/32768, scratchReg);
|
||||
VMOVL(I_16 | I_SIGNED, neonScratchRegQ, neonScratchReg); // Widen to 32-bit
|
||||
VCVT(F_32 | I_SIGNED, neonScratchRegQ, neonScratchRegQ);
|
||||
VMUL_scalar(F_32, srcNEON, neonScratchReg, QScalar(Q3, 3)); // S15
|
||||
@ -1219,8 +1219,8 @@ void VertexDecoderJitCache::Jit_WriteMatrixMul(int outOff, bool pos) {
|
||||
void VertexDecoderJitCache::Jit_PosS8Skin() {
|
||||
if (NEONSkinning) {
|
||||
ADD(scratchReg, srcReg, dec_->posoff);
|
||||
MOVI2F(S15, 1.0f/128.0f, scratchReg2);
|
||||
VLD1_lane(I_32, neonScratchReg, scratchReg, 0, false);
|
||||
MOVI2F(S15, 1.0f/128.0f, scratchReg);
|
||||
VMOVL(I_8 | I_SIGNED, neonScratchRegQ, neonScratchReg); // Widen to 16-bit
|
||||
VMOVL(I_16 | I_SIGNED, neonScratchRegQ, neonScratchReg); // Widen to 32-bit
|
||||
VCVT(F_32 | I_SIGNED, neonScratchRegQ, neonScratchRegQ);
|
||||
@ -1246,8 +1246,8 @@ void VertexDecoderJitCache::Jit_PosS8Skin() {
|
||||
void VertexDecoderJitCache::Jit_PosS16Skin() {
|
||||
if (NEONSkinning) {
|
||||
ADD(scratchReg, srcReg, dec_->posoff);
|
||||
MOVI2F(S15, 1.0f/32768, scratchReg2);
|
||||
VLD1(I_32, neonScratchReg, scratchReg, 1, ALIGN_NONE);
|
||||
MOVI2F(S15, 1.0f/32768, scratchReg);
|
||||
VMOVL(I_16 | I_SIGNED, neonScratchRegQ, neonScratchReg); // Widen to 32-bit
|
||||
VCVT(F_32 | I_SIGNED, neonScratchRegQ, neonScratchRegQ);
|
||||
VMUL_scalar(F_32, srcNEON, neonScratchReg, QScalar(Q3, 3)); // S15
|
||||
|
@ -361,11 +361,7 @@ void VertexDecoderJitCache::Jit_WeightsFloat() {
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_WeightsU8Skin() {
|
||||
#ifdef _M_X64
|
||||
MOV(PTRBITS, R(tempReg2), Imm64((uintptr_t)&bones));
|
||||
#else
|
||||
MOV(PTRBITS, R(tempReg2), Imm32((uintptr_t)&bones));
|
||||
#endif
|
||||
MOV(PTRBITS, R(tempReg2), ImmPtr(&bones));
|
||||
for (int j = 0; j < dec_->nweights; j++) {
|
||||
MOVZX(32, 8, tempReg1, MDisp(srcReg, dec_->weightoff + j));
|
||||
CVTSI2SS(XMM1, R(tempReg1));
|
||||
@ -399,11 +395,7 @@ void VertexDecoderJitCache::Jit_WeightsU8Skin() {
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_WeightsU16Skin() {
|
||||
#ifdef _M_X64
|
||||
MOV(PTRBITS, R(tempReg2), Imm64((uintptr_t)&bones));
|
||||
#else
|
||||
MOV(PTRBITS, R(tempReg2), Imm32((uintptr_t)&bones));
|
||||
#endif
|
||||
MOV(PTRBITS, R(tempReg2), ImmPtr(&bones));
|
||||
for (int j = 0; j < dec_->nweights; j++) {
|
||||
MOVZX(32, 16, tempReg1, MDisp(srcReg, dec_->weightoff + j * 2));
|
||||
CVTSI2SS(XMM1, R(tempReg1));
|
||||
@ -437,11 +429,7 @@ void VertexDecoderJitCache::Jit_WeightsU16Skin() {
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_WeightsFloatSkin() {
|
||||
#ifdef _M_X64
|
||||
MOV(PTRBITS, R(tempReg2), Imm64((uintptr_t)&bones));
|
||||
#else
|
||||
MOV(PTRBITS, R(tempReg2), Imm32((uintptr_t)&bones));
|
||||
#endif
|
||||
MOV(PTRBITS, R(tempReg2), ImmPtr(&bones));
|
||||
for (int j = 0; j < dec_->nweights; j++) {
|
||||
MOVSS(XMM1, MDisp(srcReg, dec_->weightoff + j * 4));
|
||||
SHUFPS(XMM1, R(XMM1), _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
Loading…
x
Reference in New Issue
Block a user