Merge pull request #17566 from hrydgard/vertex-decoder-color-opt

x86-64 vertex decoder: Keep the vertex-full-alpha variable in a register
This commit is contained in:
Henrik Rydgård 2023-06-12 20:23:31 +02:00 committed by GitHub
commit 17a723e68c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 44 additions and 2 deletions

View File

@ -128,7 +128,7 @@ public:
void GetSize(int level, int *w, int *h) const {
_dbg_assert_(State() == ReplacementState::ACTIVE);
_dbg_assert_(level < levels_.size());
_dbg_assert_((size_t)level < levels_.size());
*w = levels_[level].fullW;
*h = levels_[level].fullH;
}

View File

@ -60,6 +60,7 @@ static const X64Reg tempReg3 = R10;
static const X64Reg srcReg = RCX;
static const X64Reg dstReg = RDX;
static const X64Reg counterReg = R8;
static const X64Reg alphaReg = R11;
#else
static const X64Reg tempReg1 = RAX;
static const X64Reg tempReg2 = R9;
@ -67,6 +68,7 @@ static const X64Reg tempReg3 = R10;
static const X64Reg srcReg = RDI;
static const X64Reg dstReg = RSI;
static const X64Reg counterReg = RDX;
static const X64Reg alphaReg = R11;
#endif
#else
static const X64Reg tempReg1 = EAX;
@ -201,6 +203,13 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
MOVUPS(MDisp(ESP, 80), XMM9);
#endif
// Initialize alpha reg.
#if PPSSPP_ARCH(AMD64)
if (dec.col) {
MOV(32, R(alphaReg), Imm32(1));
}
#endif
bool prescaleStep = false;
// Look for prescaled texcoord steps
for (int i = 0; i < dec.numSteps_; i++) {
@ -243,6 +252,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
}
// Keep the scale/offset in a few fp registers if we need it.
// TODO: Read it from an argument pointer instead of gstate_c.uv.
if (prescaleStep) {
MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate_c.uv));
MOVUPS(fpScaleOffsetReg, MatR(tempReg1));
@ -271,6 +281,21 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
SUB(32, R(counterReg), Imm8(1));
J_CC(CC_NZ, loopStart, true);
// Writeback alpha reg
#if PPSSPP_ARCH(AMD64)
if (dec.col) {
CMP(32, R(alphaReg), Imm32(1));
FixupBranch alphaJump = J_CC(CC_E, false);
if (RipAccessible(&gstate_c.vertexFullAlpha)) {
MOV(8, M(&gstate_c.vertexFullAlpha), Imm8(0)); // rip accessible
} else {
MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate_c.vertexFullAlpha));
MOV(8, MatR(tempReg1), Imm8(0)); // rip accessible
}
SetJumpTarget(alphaJump);
}
#endif
MOVUPS(XMM4, MDisp(ESP, 0));
MOVUPS(XMM5, MDisp(ESP, 16));
MOVUPS(XMM6, MDisp(ESP, 32));
@ -930,12 +955,17 @@ void VertexDecoderJitCache::Jit_Color8888() {
CMP(32, R(tempReg1), Imm32(0xFF000000));
FixupBranch skip = J_CC(CC_AE, false);
#if PPSSPP_ARCH(AMD64)
// Would like to use CMOV or SetCC but CMOV doesn't take immediates and SetCC isn't right. So...
XOR(32, R(alphaReg), R(alphaReg));
#else
if (RipAccessible(&gstate_c.vertexFullAlpha)) {
MOV(8, M(&gstate_c.vertexFullAlpha), Imm8(0)); // rip accessible
} else {
MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate_c.vertexFullAlpha));
MOV(8, MatR(tempReg1), Imm8(0));
}
#endif
SetJumpTarget(skip);
}
@ -965,12 +995,16 @@ void VertexDecoderJitCache::Jit_Color4444() {
CMP(32, R(tempReg1), Imm32(0xFF000000));
FixupBranch skip = J_CC(CC_AE, false);
#if PPSSPP_ARCH(AMD64)
XOR(32, R(alphaReg), R(alphaReg));
#else
if (RipAccessible(&gstate_c.vertexFullAlpha)) {
MOV(8, M(&gstate_c.vertexFullAlpha), Imm8(0)); // rip accessible
} else {
MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate_c.vertexFullAlpha));
MOV(8, MatR(tempReg1), Imm8(0));
}
#endif
SetJumpTarget(skip);
}
@ -1042,14 +1076,18 @@ void VertexDecoderJitCache::Jit_Color5551() {
MOV(32, MDisp(dstReg, dec_->decFmt.c0off), R(tempReg2));
// Let's AND to avoid a branch, tempReg1 has alpha only in the top 8 bits.
// Let's AND to avoid a branch, tempReg1 has alpha only in the top 8 bits, and they're all equal.
SHR(32, R(tempReg1), Imm8(24));
#if PPSSPP_ARCH(AMD64)
AND(8, R(alphaReg), R(tempReg1));
#else
if (RipAccessible(&gstate_c.vertexFullAlpha)) {
AND(8, M(&gstate_c.vertexFullAlpha), R(tempReg1)); // rip accessible
} else {
MOV(PTRBITS, R(tempReg3), ImmPtr(&gstate_c.vertexFullAlpha));
AND(8, MatR(tempReg3), R(tempReg1));
}
#endif
}
void VertexDecoderJitCache::Jit_Color8888Morph() {
@ -1258,12 +1296,16 @@ void VertexDecoderJitCache::Jit_WriteMorphColor(int outOff, bool checkAlpha) {
if (checkAlpha) {
CMP(32, R(tempReg1), Imm32(0xFF000000));
FixupBranch skip = J_CC(CC_AE, false);
#if PPSSPP_ARCH(AMD64)
XOR(32, R(alphaReg), R(alphaReg));
#else
if (RipAccessible(&gstate_c.vertexFullAlpha)) {
MOV(8, M(&gstate_c.vertexFullAlpha), Imm8(0)); // rip accessible
} else {
MOV(PTRBITS, R(tempReg2), ImmPtr(&gstate_c.vertexFullAlpha));
MOV(8, MatR(tempReg2), Imm8(0));
}
#endif
SetJumpTarget(skip);
} else {
// Force alpha to full if we're not checking it.