mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 13:30:02 +00:00
vertexjit: Only save extra regs on x64.
This commit is contained in:
parent
30b6f1f865
commit
c1fa4958d9
@ -53,7 +53,7 @@ alignas(16) static const float by16384[4] = {
|
||||
1.0f / 16384.0f, 1.0f / 16384.0f, 1.0f / 16384.0f, 1.0f / 16384.0f,
|
||||
};
|
||||
|
||||
#ifdef _M_X64
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
#ifdef _WIN32
|
||||
static const X64Reg tempReg1 = RAX;
|
||||
static const X64Reg tempReg2 = R9;
|
||||
@ -197,8 +197,10 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
|
||||
MOVUPS(MDisp(ESP, 16), XMM5);
|
||||
MOVUPS(MDisp(ESP, 32), XMM6);
|
||||
MOVUPS(MDisp(ESP, 48), XMM7);
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
MOVUPS(MDisp(ESP, 64), XMM8);
|
||||
MOVUPS(MDisp(ESP, 80), XMM9);
|
||||
#endif
|
||||
|
||||
bool prescaleStep = false;
|
||||
// Look for prescaled texcoord steps
|
||||
@ -275,11 +277,13 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
|
||||
MOVUPS(XMM5, MDisp(ESP, 16));
|
||||
MOVUPS(XMM6, MDisp(ESP, 32));
|
||||
MOVUPS(XMM7, MDisp(ESP, 48));
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
MOVUPS(XMM8, MDisp(ESP, 64));
|
||||
MOVUPS(XMM9, MDisp(ESP, 80));
|
||||
#endif
|
||||
ADD(PTRBITS, R(ESP), Imm8(STACK_FIXED_ALLOC));
|
||||
|
||||
#ifdef _M_IX86
|
||||
#if PPSSPP_ARCH(X86)
|
||||
// Restore register values
|
||||
POP(EBP);
|
||||
POP(EBX);
|
||||
@ -466,7 +470,7 @@ void VertexDecoderJitCache::Jit_WeightsFloat() {
|
||||
void VertexDecoderJitCache::Jit_WeightsU8Skin() {
|
||||
MOV(PTRBITS, R(tempReg2), ImmPtr(&bones));
|
||||
|
||||
#ifdef _M_X64
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
if (dec_->nweights > 4) {
|
||||
// This reads 8 bytes, we split the top 4 so we can expand each set of 4.
|
||||
MOVQ_xmm(XMM8, MDisp(srcReg, dec_->weightoff));
|
||||
@ -518,7 +522,7 @@ void VertexDecoderJitCache::Jit_WeightsU8Skin() {
|
||||
|
||||
for (int j = 0; j < dec_->nweights; j++) {
|
||||
X64Reg weight = XMM1;
|
||||
#ifdef _M_X64
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
X64Reg weightSrc = j < 4 ? XMM8 : XMM9;
|
||||
if (j == 3 || j == dec_->nweights - 1) {
|
||||
// In the previous iteration, we already spread this value to all lanes.
|
||||
@ -576,7 +580,7 @@ void VertexDecoderJitCache::Jit_WeightsU8Skin() {
|
||||
void VertexDecoderJitCache::Jit_WeightsU16Skin() {
|
||||
MOV(PTRBITS, R(tempReg2), ImmPtr(&bones));
|
||||
|
||||
#ifdef _M_X64
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
if (dec_->nweights > 6) {
|
||||
// Since this is probably not aligned, two MOVQs are better than one MOVDQU.
|
||||
MOVQ_xmm(XMM8, MDisp(srcReg, dec_->weightoff));
|
||||
@ -632,7 +636,7 @@ void VertexDecoderJitCache::Jit_WeightsU16Skin() {
|
||||
|
||||
for (int j = 0; j < dec_->nweights; j++) {
|
||||
X64Reg weight = XMM1;
|
||||
#ifdef _M_X64
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
X64Reg weightSrc = j < 4 ? XMM8 : XMM9;
|
||||
if (j == 3 || j == dec_->nweights - 1) {
|
||||
// In the previous iteration, we already spread this value to all lanes.
|
||||
@ -730,7 +734,7 @@ void VertexDecoderJitCache::Jit_TcU16ToFloat() {
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcFloat() {
|
||||
#ifdef _M_X64
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
MOV(64, R(tempReg1), MDisp(srcReg, dec_->tcoff));
|
||||
MOV(64, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1));
|
||||
#else
|
||||
@ -911,7 +915,7 @@ void VertexDecoderJitCache::Jit_TcU16ThroughToFloat() {
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcFloatThrough() {
|
||||
#ifdef _M_X64
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
MOV(64, R(tempReg1), MDisp(srcReg, dec_->tcoff));
|
||||
MOV(64, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1));
|
||||
#else
|
||||
|
Loading…
Reference in New Issue
Block a user