vertexjit: Only save extra regs on x64.

2024-11-23 13:30:02 +00:00 · 2021-02-01 07:06:18 -08:00 · 2021-02-01 07:06:18 -08:00 · c1fa4958d9
commit c1fa4958d9
parent 30b6f1f865
1 changed files with 12 additions and 8 deletions
--- a/GPU/Common/VertexDecoderX86.cpp
+++ b/GPU/Common/VertexDecoderX86.cpp
@ -53,7 +53,7 @@ alignas(16) static const float by16384[4] = {
 	1.0f / 16384.0f, 1.0f / 16384.0f, 1.0f / 16384.0f, 1.0f / 16384.0f,
 };

-#ifdef _M_X64
+#if PPSSPP_ARCH(AMD64)
 #ifdef _WIN32
 static const X64Reg tempReg1 = RAX;
 static const X64Reg tempReg2 = R9;
@ -197,8 +197,10 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
 	MOVUPS(MDisp(ESP, 16), XMM5);
 	MOVUPS(MDisp(ESP, 32), XMM6);
 	MOVUPS(MDisp(ESP, 48), XMM7);
+#if PPSSPP_ARCH(AMD64)
 	MOVUPS(MDisp(ESP, 64), XMM8);
 	MOVUPS(MDisp(ESP, 80), XMM9);
+#endif

 	bool prescaleStep = false;
 	// Look for prescaled texcoord steps
@ -275,11 +277,13 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
 	MOVUPS(XMM5, MDisp(ESP, 16));
 	MOVUPS(XMM6, MDisp(ESP, 32));
 	MOVUPS(XMM7, MDisp(ESP, 48));
+#if PPSSPP_ARCH(AMD64)
 	MOVUPS(XMM8, MDisp(ESP, 64));
 	MOVUPS(XMM9, MDisp(ESP, 80));
+#endif
 	ADD(PTRBITS, R(ESP), Imm8(STACK_FIXED_ALLOC));

-#ifdef _M_IX86
+#if PPSSPP_ARCH(X86)
 	// Restore register values
 	POP(EBP);
 	POP(EBX);
@ -466,7 +470,7 @@ void VertexDecoderJitCache::Jit_WeightsFloat() {
 void VertexDecoderJitCache::Jit_WeightsU8Skin() {
 	MOV(PTRBITS, R(tempReg2), ImmPtr(&bones));

-#ifdef _M_X64
+#if PPSSPP_ARCH(AMD64)
 	if (dec_->nweights > 4) {
 		// This reads 8 bytes, we split the top 4 so we can expand each set of 4.
 		MOVQ_xmm(XMM8, MDisp(srcReg, dec_->weightoff));
@ -518,7 +522,7 @@ void VertexDecoderJitCache::Jit_WeightsU8Skin() {

 	for (int j = 0; j < dec_->nweights; j++) {
 		X64Reg weight = XMM1;
-#ifdef _M_X64
+#if PPSSPP_ARCH(AMD64)
 		X64Reg weightSrc = j < 4 ? XMM8 : XMM9;
 		if (j == 3 || j == dec_->nweights - 1) {
 			// In the previous iteration, we already spread this value to all lanes.
@ -576,7 +580,7 @@ void VertexDecoderJitCache::Jit_WeightsU8Skin() {
 void VertexDecoderJitCache::Jit_WeightsU16Skin() {
 	MOV(PTRBITS, R(tempReg2), ImmPtr(&bones));

-#ifdef _M_X64
+#if PPSSPP_ARCH(AMD64)
 	if (dec_->nweights > 6) {
 		// Since this is probably not aligned, two MOVQs are better than one MOVDQU.
 		MOVQ_xmm(XMM8, MDisp(srcReg, dec_->weightoff));
@ -632,7 +636,7 @@ void VertexDecoderJitCache::Jit_WeightsU16Skin() {

 	for (int j = 0; j < dec_->nweights; j++) {
 		X64Reg weight = XMM1;
-#ifdef _M_X64
+#if PPSSPP_ARCH(AMD64)
 		X64Reg weightSrc = j < 4 ? XMM8 : XMM9;
 		if (j == 3 || j == dec_->nweights - 1) {
 			// In the previous iteration, we already spread this value to all lanes.
@ -730,7 +734,7 @@ void VertexDecoderJitCache::Jit_TcU16ToFloat() {
 }

 void VertexDecoderJitCache::Jit_TcFloat() {
-#ifdef _M_X64
+#if PPSSPP_ARCH(AMD64)
 	MOV(64, R(tempReg1), MDisp(srcReg, dec_->tcoff));
 	MOV(64, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1));
 #else
@ -911,7 +915,7 @@ void VertexDecoderJitCache::Jit_TcU16ThroughToFloat() {
 }

 void VertexDecoderJitCache::Jit_TcFloatThrough() {
-#ifdef _M_X64
+#if PPSSPP_ARCH(AMD64)
 	MOV(64, R(tempReg1), MDisp(srcReg, dec_->tcoff));
 	MOV(64, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1));
 #else