mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-02-26 00:45:49 +00:00
Initial stab at tracking vertex alpha.
Not sure what efficient method to use on x86...
This commit is contained in:
parent
893a719c4e
commit
61f5d3d360
@ -219,6 +219,7 @@ void VertexDecoder::Step_Color565() const
|
||||
c[1] = Convert6To8((cdata>>5) & 0x3f);
|
||||
c[2] = Convert5To8((cdata>>11) & 0x1f);
|
||||
c[3] = 255;
|
||||
// Always full alpha.
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_Color5551() const
|
||||
@ -229,6 +230,7 @@ void VertexDecoder::Step_Color5551() const
|
||||
c[1] = Convert5To8((cdata>>5) & 0x1f);
|
||||
c[2] = Convert5To8((cdata>>10) & 0x1f);
|
||||
c[3] = (cdata >> 15) ? 255 : 0;
|
||||
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] != 0;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_Color4444() const
|
||||
@ -237,6 +239,7 @@ void VertexDecoder::Step_Color4444() const
|
||||
u16 cdata = *(u16*)(ptr_ + coloff);
|
||||
for (int j = 0; j < 4; j++)
|
||||
c[j] = Convert4To8((cdata >> (j * 4)) & 0xF);
|
||||
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_Color8888() const
|
||||
@ -244,6 +247,7 @@ void VertexDecoder::Step_Color8888() const
|
||||
u8 *c = decoded_ + decFmt.c0off;
|
||||
const u8 *cdata = (const u8*)(ptr_ + coloff);
|
||||
memcpy(c, cdata, sizeof(u8) * 4);
|
||||
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_Color565Morph() const
|
||||
@ -262,6 +266,7 @@ void VertexDecoder::Step_Color565Morph() const
|
||||
c[i] = (u8)col[i];
|
||||
}
|
||||
c[3] = 255;
|
||||
// Always full alpha.
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_Color5551Morph() const
|
||||
@ -280,6 +285,7 @@ void VertexDecoder::Step_Color5551Morph() const
|
||||
for (int i = 0; i < 4; i++) {
|
||||
c[i] = (u8)col[i];
|
||||
}
|
||||
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_Color4444Morph() const
|
||||
@ -296,6 +302,7 @@ void VertexDecoder::Step_Color4444Morph() const
|
||||
for (int i = 0; i < 4; i++) {
|
||||
c[i] = (u8)col[i];
|
||||
}
|
||||
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_Color8888Morph() const
|
||||
@ -312,6 +319,7 @@ void VertexDecoder::Step_Color8888Morph() const
|
||||
for (int i = 0; i < 4; i++) {
|
||||
c[i] = (u8)(col[i]);
|
||||
}
|
||||
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_NormalS8() const
|
||||
@ -841,6 +849,7 @@ void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, int indexLowe
|
||||
jitted_(ptr_, decoded_, count);
|
||||
} else {
|
||||
// Interpret the decode steps
|
||||
// TODO: Init gstate_c.vertexFullAlpha here? Or in Setup? When is it reset?
|
||||
for (; count; count--) {
|
||||
for (int i = 0; i < numSteps_; i++) {
|
||||
((*this).*steps_[i])();
|
||||
|
@ -266,6 +266,6 @@ private:
|
||||
bool CompileStep(const VertexDecoder &dec, int i);
|
||||
void Jit_ApplyWeights();
|
||||
void Jit_WriteMatrixMul(int outOff, bool pos);
|
||||
void Jit_WriteMorphColor(int outOff);
|
||||
void Jit_WriteMorphColor(int outOff, bool checkAlpha = true);
|
||||
const VertexDecoder *dec_;
|
||||
};
|
||||
|
@ -61,7 +61,8 @@ static const ARMReg tempReg2 = R4;
|
||||
static const ARMReg tempReg3 = R5;
|
||||
static const ARMReg scratchReg = R6;
|
||||
static const ARMReg scratchReg2 = R7;
|
||||
static const ARMReg scratchReg3 = R12;
|
||||
static const ARMReg scratchReg3 = R8;
|
||||
static const ARMReg hasAlphaReg = R12;
|
||||
static const ARMReg srcReg = R0;
|
||||
static const ARMReg dstReg = R1;
|
||||
static const ARMReg counterReg = R2;
|
||||
@ -262,6 +263,10 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
|
||||
// TODO: Preload scale factors
|
||||
}
|
||||
|
||||
if (dec.col) {
|
||||
MOV(hasAlphaReg, 0);
|
||||
}
|
||||
|
||||
JumpTarget loopStart = GetCodePtr();
|
||||
// Preload data cache ahead of reading. This offset seems pretty good.
|
||||
PLD(srcReg, 64);
|
||||
@ -281,6 +286,11 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
|
||||
SUBS(counterReg, counterReg, 1);
|
||||
B_CC(CC_NEQ, loopStart);
|
||||
|
||||
// TODO: Do something with hasAlphaReg.
|
||||
if (dec.col) {
|
||||
|
||||
}
|
||||
|
||||
if (NEONSkinning || NEONMorphing) {
|
||||
VPOP(D8, 8);
|
||||
}
|
||||
@ -664,7 +674,12 @@ void VertexDecoderJitCache::Jit_TcFloatPrescale() {
|
||||
|
||||
void VertexDecoderJitCache::Jit_Color8888() {
|
||||
LDR(tempReg1, srcReg, dec_->coloff);
|
||||
// Set flags to determine if alpha != 0xFF.
|
||||
MVNS(tempReg2, Operand2(tempReg1, ST_ASR, 24));
|
||||
STR(tempReg1, dstReg, dec_->decFmt.c0off);
|
||||
SetCC(CC_NEQ);
|
||||
ORR(hasAlphaReg, hasAlphaReg, IMM(1));
|
||||
SetCC(CC_AL);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_Color4444() {
|
||||
@ -679,10 +694,16 @@ void VertexDecoderJitCache::Jit_Color4444() {
|
||||
ANDI2R(tempReg3, tempReg1, 0xF000, scratchReg);
|
||||
ORR(tempReg2, tempReg2, Operand2(tempReg3, ST_LSL, 12));
|
||||
|
||||
// And saturate.
|
||||
// And expand to 8 bits.
|
||||
ORR(tempReg1, tempReg2, Operand2(tempReg2, ST_LSL, 4));
|
||||
|
||||
STR(tempReg1, dstReg, dec_->decFmt.c0off);
|
||||
|
||||
// Set flags to determine if alpha != 0xFF.
|
||||
MVNS(tempReg2, Operand2(tempReg1, ST_ASR, 24));
|
||||
SetCC(CC_NEQ);
|
||||
ORR(hasAlphaReg, hasAlphaReg, IMM(1));
|
||||
SetCC(CC_AL);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_Color565() {
|
||||
@ -706,7 +727,7 @@ void VertexDecoderJitCache::Jit_Color565() {
|
||||
ORR(tempReg3, tempReg3, Operand2(tempReg1, ST_LSR, 4));
|
||||
ORR(tempReg2, tempReg2, Operand2(tempReg3, ST_LSL, 8));
|
||||
|
||||
// Add in full alpha.
|
||||
// Add in full alpha. No need to update hasAlphaReg.
|
||||
ORI2R(tempReg1, tempReg2, 0xFF000000, scratchReg);
|
||||
|
||||
STR(tempReg1, dstReg, dec_->decFmt.c0off);
|
||||
@ -731,8 +752,13 @@ void VertexDecoderJitCache::Jit_Color5551() {
|
||||
// Now we just need alpha. Since we loaded as signed, it'll be extended.
|
||||
ANDI2R(tempReg1, tempReg1, 0xFF000000, scratchReg);
|
||||
ORR(tempReg2, tempReg2, tempReg1);
|
||||
|
||||
|
||||
// Set flags to determine if alpha != 0xFF.
|
||||
MVNS(tempReg3, Operand2(tempReg1, ST_ASR, 24));
|
||||
STR(tempReg2, dstReg, dec_->decFmt.c0off);
|
||||
SetCC(CC_NEQ);
|
||||
ORR(hasAlphaReg, hasAlphaReg, IMM(1));
|
||||
SetCC(CC_AL);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_Color8888Morph() {
|
||||
@ -957,7 +983,7 @@ void VertexDecoderJitCache::Jit_Color565Morph() {
|
||||
} else {
|
||||
VMOV(S11, tempReg3);
|
||||
}
|
||||
Jit_WriteMorphColor(dec_->decFmt.c0off);
|
||||
Jit_WriteMorphColor(dec_->decFmt.c0off, false);
|
||||
}
|
||||
|
||||
// First is the left shift, second is the right shift (against walls, to get the RGBA values.)
|
||||
@ -1045,13 +1071,16 @@ void VertexDecoderJitCache::Jit_Color5551Morph() {
|
||||
}
|
||||
|
||||
// Expects RGBA color in S8 - S11, which is Q2.
|
||||
void VertexDecoderJitCache::Jit_WriteMorphColor(int outOff) {
|
||||
void VertexDecoderJitCache::Jit_WriteMorphColor(int outOff, bool checkAlpha) {
|
||||
if (NEONMorphing) {
|
||||
ADDI2R(tempReg1, dstReg, outOff, scratchReg);
|
||||
VCVT(I_32 | I_UNSIGNED, neonScratchRegQ, neonScratchRegQ);
|
||||
VQMOVN(I_32 | I_UNSIGNED, neonScratchReg, neonScratchRegQ);
|
||||
VQMOVN(I_16 | I_UNSIGNED, neonScratchReg, neonScratchRegQ);
|
||||
VST1_lane(I_32, neonScratchReg, tempReg1, 0, true);
|
||||
if (checkAlpha) {
|
||||
VMOV_neon(I_32, scratchReg, neonScratchReg, 0);
|
||||
}
|
||||
} else {
|
||||
VCVT(S8, S8, TO_INT);
|
||||
VCVT(S9, S9, TO_INT);
|
||||
@ -1066,6 +1095,14 @@ void VertexDecoderJitCache::Jit_WriteMorphColor(int outOff) {
|
||||
ORR(scratchReg, scratchReg, Operand2(tempReg3, ST_LSL, 24));
|
||||
STR(scratchReg, dstReg, outOff);
|
||||
}
|
||||
|
||||
// Set flags to determine if alpha != 0xFF.
|
||||
if (checkAlpha) {
|
||||
MVNS(tempReg2, Operand2(scratchReg, ST_ASR, 24));
|
||||
SetCC(CC_NEQ);
|
||||
ORR(hasAlphaReg, hasAlphaReg, IMM(1));
|
||||
SetCC(CC_AL);
|
||||
}
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_NormalS8() {
|
||||
|
@ -54,6 +54,7 @@ static const X64Reg tempReg3 = R10;
|
||||
static const X64Reg srcReg = RCX;
|
||||
static const X64Reg dstReg = RDX;
|
||||
static const X64Reg counterReg = R8;
|
||||
static const OpArg hasAlphaArg = R(R14);
|
||||
#else
|
||||
static const X64Reg tempReg1 = RAX;
|
||||
static const X64Reg tempReg2 = R9;
|
||||
@ -61,6 +62,7 @@ static const X64Reg tempReg3 = R10;
|
||||
static const X64Reg srcReg = RDI;
|
||||
static const X64Reg dstReg = RSI;
|
||||
static const X64Reg counterReg = RDX;
|
||||
static const OpArg hasAlphaArg = R(R14);
|
||||
#endif
|
||||
#else
|
||||
static const X64Reg tempReg1 = EAX;
|
||||
@ -69,6 +71,8 @@ static const X64Reg tempReg3 = EDX;
|
||||
static const X64Reg srcReg = ESI;
|
||||
static const X64Reg dstReg = EDI;
|
||||
static const X64Reg counterReg = ECX;
|
||||
static u32 hasAlphaValue;
|
||||
static const OpArg hasAlphaArg = M(&hasAlphaValue);
|
||||
#endif
|
||||
|
||||
// XMM0-XMM5 are volatile on Windows X64
|
||||
@ -234,6 +238,10 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
|
||||
UNPCKLPD(fpScaleOffsetReg, R(fpScratchReg));
|
||||
}
|
||||
|
||||
if (dec.col) {
|
||||
MOV(32, hasAlphaArg, Imm32(0));
|
||||
}
|
||||
|
||||
// Let's not bother with a proper stack frame. We just grab the arguments and go.
|
||||
JumpTarget loopStart = GetCodePtr();
|
||||
for (int i = 0; i < dec.numSteps_; i++) {
|
||||
@ -249,6 +257,11 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
|
||||
SUB(32, R(counterReg), Imm8(1));
|
||||
J_CC(CC_NZ, loopStart, true);
|
||||
|
||||
// TODO: Do something with hasAlphaArg from EAX.
|
||||
if (dec.col) {
|
||||
//MOV(32, R(EAX), hasAlphaArg);
|
||||
}
|
||||
|
||||
MOVUPS(XMM4, MDisp(ESP, 0));
|
||||
MOVUPS(XMM5, MDisp(ESP, 16));
|
||||
MOVUPS(XMM6, MDisp(ESP, 32));
|
||||
@ -556,6 +569,7 @@ void VertexDecoderJitCache::Jit_TcFloatThrough() {
|
||||
void VertexDecoderJitCache::Jit_Color8888() {
|
||||
MOV(32, R(tempReg1), MDisp(srcReg, dec_->coloff));
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.c0off), R(tempReg1));
|
||||
// TODO: hasAlphaArg.
|
||||
}
|
||||
|
||||
static const u32 MEMORY_ALIGNED16(nibbles[4]) = { 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, };
|
||||
@ -625,6 +639,7 @@ void VertexDecoderJitCache::Jit_Color4444() {
|
||||
OR(32, R(tempReg2), R(tempReg3));
|
||||
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.c0off), R(tempReg2));
|
||||
// TODO: hasAlphaArg.
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_Color565() {
|
||||
@ -661,6 +676,7 @@ void VertexDecoderJitCache::Jit_Color565() {
|
||||
OR(32, R(tempReg2), R(tempReg1));
|
||||
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.c0off), R(tempReg2));
|
||||
// Never has alpha, no need to update hasAlphaArg.
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_Color5551() {
|
||||
@ -696,6 +712,7 @@ void VertexDecoderJitCache::Jit_Color5551() {
|
||||
OR(32, R(tempReg2), R(tempReg1));
|
||||
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.c0off), R(tempReg2));
|
||||
// TODO: hasAlphaArg.
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_Color8888Morph() {
|
||||
@ -825,7 +842,7 @@ void VertexDecoderJitCache::Jit_Color565Morph() {
|
||||
}
|
||||
}
|
||||
|
||||
Jit_WriteMorphColor(dec_->decFmt.c0off);
|
||||
Jit_WriteMorphColor(dec_->decFmt.c0off, false);
|
||||
}
|
||||
|
||||
// Intentionally in reverse order.
|
||||
@ -884,12 +901,15 @@ void VertexDecoderJitCache::Jit_Color5551Morph() {
|
||||
Jit_WriteMorphColor(dec_->decFmt.c0off);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_WriteMorphColor(int outOff) {
|
||||
void VertexDecoderJitCache::Jit_WriteMorphColor(int outOff, bool checkAlpha) {
|
||||
// Pack back into a u32.
|
||||
CVTPS2DQ(fpScratchReg, R(fpScratchReg));
|
||||
PACKSSDW(fpScratchReg, R(fpScratchReg));
|
||||
PACKUSWB(fpScratchReg, R(fpScratchReg));
|
||||
MOVD_xmm(MDisp(dstReg, outOff), fpScratchReg);
|
||||
if (checkAlpha) {
|
||||
// TODO: hasAlphaArg.
|
||||
}
|
||||
}
|
||||
|
||||
// Copy 3 bytes and then a zero. Might as well copy four.
|
||||
|
Loading…
x
Reference in New Issue
Block a user