vertexjit: Only save extra regs on x64.

This commit is contained in:
Unknown W. Brackets 2021-02-01 07:06:18 -08:00
parent 30b6f1f865
commit c1fa4958d9

View File

@ -53,7 +53,7 @@ alignas(16) static const float by16384[4] = {
1.0f / 16384.0f, 1.0f / 16384.0f, 1.0f / 16384.0f, 1.0f / 16384.0f,
};
#ifdef _M_X64
#if PPSSPP_ARCH(AMD64)
#ifdef _WIN32
static const X64Reg tempReg1 = RAX;
static const X64Reg tempReg2 = R9;
@ -197,8 +197,10 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
MOVUPS(MDisp(ESP, 16), XMM5);
MOVUPS(MDisp(ESP, 32), XMM6);
MOVUPS(MDisp(ESP, 48), XMM7);
#if PPSSPP_ARCH(AMD64)
MOVUPS(MDisp(ESP, 64), XMM8);
MOVUPS(MDisp(ESP, 80), XMM9);
#endif
bool prescaleStep = false;
// Look for prescaled texcoord steps
@ -275,11 +277,13 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
MOVUPS(XMM5, MDisp(ESP, 16));
MOVUPS(XMM6, MDisp(ESP, 32));
MOVUPS(XMM7, MDisp(ESP, 48));
#if PPSSPP_ARCH(AMD64)
MOVUPS(XMM8, MDisp(ESP, 64));
MOVUPS(XMM9, MDisp(ESP, 80));
#endif
ADD(PTRBITS, R(ESP), Imm8(STACK_FIXED_ALLOC));
#ifdef _M_IX86
#if PPSSPP_ARCH(X86)
// Restore register values
POP(EBP);
POP(EBX);
@ -466,7 +470,7 @@ void VertexDecoderJitCache::Jit_WeightsFloat() {
void VertexDecoderJitCache::Jit_WeightsU8Skin() {
MOV(PTRBITS, R(tempReg2), ImmPtr(&bones));
#ifdef _M_X64
#if PPSSPP_ARCH(AMD64)
if (dec_->nweights > 4) {
// This reads 8 bytes, we split the top 4 so we can expand each set of 4.
MOVQ_xmm(XMM8, MDisp(srcReg, dec_->weightoff));
@ -518,7 +522,7 @@ void VertexDecoderJitCache::Jit_WeightsU8Skin() {
for (int j = 0; j < dec_->nweights; j++) {
X64Reg weight = XMM1;
#ifdef _M_X64
#if PPSSPP_ARCH(AMD64)
X64Reg weightSrc = j < 4 ? XMM8 : XMM9;
if (j == 3 || j == dec_->nweights - 1) {
// In the previous iteration, we already spread this value to all lanes.
@ -576,7 +580,7 @@ void VertexDecoderJitCache::Jit_WeightsU8Skin() {
void VertexDecoderJitCache::Jit_WeightsU16Skin() {
MOV(PTRBITS, R(tempReg2), ImmPtr(&bones));
#ifdef _M_X64
#if PPSSPP_ARCH(AMD64)
if (dec_->nweights > 6) {
// Since this is probably not aligned, two MOVQs are better than one MOVDQU.
MOVQ_xmm(XMM8, MDisp(srcReg, dec_->weightoff));
@ -632,7 +636,7 @@ void VertexDecoderJitCache::Jit_WeightsU16Skin() {
for (int j = 0; j < dec_->nweights; j++) {
X64Reg weight = XMM1;
#ifdef _M_X64
#if PPSSPP_ARCH(AMD64)
X64Reg weightSrc = j < 4 ? XMM8 : XMM9;
if (j == 3 || j == dec_->nweights - 1) {
// In the previous iteration, we already spread this value to all lanes.
@ -730,7 +734,7 @@ void VertexDecoderJitCache::Jit_TcU16ToFloat() {
}
void VertexDecoderJitCache::Jit_TcFloat() {
#ifdef _M_X64
#if PPSSPP_ARCH(AMD64)
MOV(64, R(tempReg1), MDisp(srcReg, dec_->tcoff));
MOV(64, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1));
#else
@ -911,7 +915,7 @@ void VertexDecoderJitCache::Jit_TcU16ThroughToFloat() {
}
void VertexDecoderJitCache::Jit_TcFloatThrough() {
#ifdef _M_X64
#if PPSSPP_ARCH(AMD64)
MOV(64, R(tempReg1), MDisp(srcReg, dec_->tcoff));
MOV(64, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1));
#else