mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-26 23:10:38 +00:00
Merge pull request #17566 from hrydgard/vertex-decoder-color-opt
x86-64 vertex decoder: Keep the vertex-full-alpha variable in a register
This commit is contained in:
commit
17a723e68c
@ -128,7 +128,7 @@ public:
|
||||
|
||||
void GetSize(int level, int *w, int *h) const {
|
||||
_dbg_assert_(State() == ReplacementState::ACTIVE);
|
||||
_dbg_assert_(level < levels_.size());
|
||||
_dbg_assert_((size_t)level < levels_.size());
|
||||
*w = levels_[level].fullW;
|
||||
*h = levels_[level].fullH;
|
||||
}
|
||||
|
@ -60,6 +60,7 @@ static const X64Reg tempReg3 = R10;
|
||||
static const X64Reg srcReg = RCX;
|
||||
static const X64Reg dstReg = RDX;
|
||||
static const X64Reg counterReg = R8;
|
||||
static const X64Reg alphaReg = R11;
|
||||
#else
|
||||
static const X64Reg tempReg1 = RAX;
|
||||
static const X64Reg tempReg2 = R9;
|
||||
@ -67,6 +68,7 @@ static const X64Reg tempReg3 = R10;
|
||||
static const X64Reg srcReg = RDI;
|
||||
static const X64Reg dstReg = RSI;
|
||||
static const X64Reg counterReg = RDX;
|
||||
static const X64Reg alphaReg = R11;
|
||||
#endif
|
||||
#else
|
||||
static const X64Reg tempReg1 = EAX;
|
||||
@ -201,6 +203,13 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
|
||||
MOVUPS(MDisp(ESP, 80), XMM9);
|
||||
#endif
|
||||
|
||||
// Initialize alpha reg.
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
if (dec.col) {
|
||||
MOV(32, R(alphaReg), Imm32(1));
|
||||
}
|
||||
#endif
|
||||
|
||||
bool prescaleStep = false;
|
||||
// Look for prescaled texcoord steps
|
||||
for (int i = 0; i < dec.numSteps_; i++) {
|
||||
@ -243,6 +252,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
|
||||
}
|
||||
|
||||
// Keep the scale/offset in a few fp registers if we need it.
|
||||
// TODO: Read it from an argument pointer instead of gstate_c.uv.
|
||||
if (prescaleStep) {
|
||||
MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate_c.uv));
|
||||
MOVUPS(fpScaleOffsetReg, MatR(tempReg1));
|
||||
@ -271,6 +281,21 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
|
||||
SUB(32, R(counterReg), Imm8(1));
|
||||
J_CC(CC_NZ, loopStart, true);
|
||||
|
||||
// Writeback alpha reg
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
if (dec.col) {
|
||||
CMP(32, R(alphaReg), Imm32(1));
|
||||
FixupBranch alphaJump = J_CC(CC_E, false);
|
||||
if (RipAccessible(&gstate_c.vertexFullAlpha)) {
|
||||
MOV(8, M(&gstate_c.vertexFullAlpha), Imm8(0)); // rip accessible
|
||||
} else {
|
||||
MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate_c.vertexFullAlpha));
|
||||
MOV(8, MatR(tempReg1), Imm8(0)); // rip accessible
|
||||
}
|
||||
SetJumpTarget(alphaJump);
|
||||
}
|
||||
#endif
|
||||
|
||||
MOVUPS(XMM4, MDisp(ESP, 0));
|
||||
MOVUPS(XMM5, MDisp(ESP, 16));
|
||||
MOVUPS(XMM6, MDisp(ESP, 32));
|
||||
@ -930,12 +955,17 @@ void VertexDecoderJitCache::Jit_Color8888() {
|
||||
|
||||
CMP(32, R(tempReg1), Imm32(0xFF000000));
|
||||
FixupBranch skip = J_CC(CC_AE, false);
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
// Would like to use CMOV or SetCC but CMOV doesn't take immediates and SetCC isn't right. So...
|
||||
XOR(32, R(alphaReg), R(alphaReg));
|
||||
#else
|
||||
if (RipAccessible(&gstate_c.vertexFullAlpha)) {
|
||||
MOV(8, M(&gstate_c.vertexFullAlpha), Imm8(0)); // rip accessible
|
||||
} else {
|
||||
MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate_c.vertexFullAlpha));
|
||||
MOV(8, MatR(tempReg1), Imm8(0));
|
||||
}
|
||||
#endif
|
||||
SetJumpTarget(skip);
|
||||
}
|
||||
|
||||
@ -965,12 +995,16 @@ void VertexDecoderJitCache::Jit_Color4444() {
|
||||
|
||||
CMP(32, R(tempReg1), Imm32(0xFF000000));
|
||||
FixupBranch skip = J_CC(CC_AE, false);
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
XOR(32, R(alphaReg), R(alphaReg));
|
||||
#else
|
||||
if (RipAccessible(&gstate_c.vertexFullAlpha)) {
|
||||
MOV(8, M(&gstate_c.vertexFullAlpha), Imm8(0)); // rip accessible
|
||||
} else {
|
||||
MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate_c.vertexFullAlpha));
|
||||
MOV(8, MatR(tempReg1), Imm8(0));
|
||||
}
|
||||
#endif
|
||||
SetJumpTarget(skip);
|
||||
}
|
||||
|
||||
@ -1042,14 +1076,18 @@ void VertexDecoderJitCache::Jit_Color5551() {
|
||||
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.c0off), R(tempReg2));
|
||||
|
||||
// Let's AND to avoid a branch, tempReg1 has alpha only in the top 8 bits.
|
||||
// Let's AND to avoid a branch, tempReg1 has alpha only in the top 8 bits, and they're all equal.
|
||||
SHR(32, R(tempReg1), Imm8(24));
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
AND(8, R(alphaReg), R(tempReg1));
|
||||
#else
|
||||
if (RipAccessible(&gstate_c.vertexFullAlpha)) {
|
||||
AND(8, M(&gstate_c.vertexFullAlpha), R(tempReg1)); // rip accessible
|
||||
} else {
|
||||
MOV(PTRBITS, R(tempReg3), ImmPtr(&gstate_c.vertexFullAlpha));
|
||||
AND(8, MatR(tempReg3), R(tempReg1));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_Color8888Morph() {
|
||||
@ -1258,12 +1296,16 @@ void VertexDecoderJitCache::Jit_WriteMorphColor(int outOff, bool checkAlpha) {
|
||||
if (checkAlpha) {
|
||||
CMP(32, R(tempReg1), Imm32(0xFF000000));
|
||||
FixupBranch skip = J_CC(CC_AE, false);
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
XOR(32, R(alphaReg), R(alphaReg));
|
||||
#else
|
||||
if (RipAccessible(&gstate_c.vertexFullAlpha)) {
|
||||
MOV(8, M(&gstate_c.vertexFullAlpha), Imm8(0)); // rip accessible
|
||||
} else {
|
||||
MOV(PTRBITS, R(tempReg2), ImmPtr(&gstate_c.vertexFullAlpha));
|
||||
MOV(8, MatR(tempReg2), Imm8(0));
|
||||
}
|
||||
#endif
|
||||
SetJumpTarget(skip);
|
||||
} else {
|
||||
// Force alpha to full if we're not checking it.
|
||||
|
Loading…
Reference in New Issue
Block a user