mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-01-30 18:14:24 +00:00
ARM64: Some minor vertex decoder work. Hm, I think SCVTF will actually divide by 128.0, not 127.0 :/
This commit is contained in:
parent
1a02e32ad1
commit
5496b3d3b1
@ -53,7 +53,7 @@ static const ARM64Reg fpUVoffsetReg = D1;
|
||||
static const ARM64Reg neonScratchReg = D2;
|
||||
static const ARM64Reg neonScratchReg2 = D3;
|
||||
|
||||
static const ARM64Reg neonScratchRegQ = Q1; // Overlaps with all the scratch regs
|
||||
static const ARM64Reg neonScratchRegQ = Q1;
|
||||
|
||||
// Everything above S6 is fair game for skinning
|
||||
|
||||
@ -85,15 +85,18 @@ static const JitLookup jitLookup[] = {
|
||||
{&VertexDecoder::Step_TcU8Prescale, &VertexDecoderJitCache::Jit_TcU8Prescale},
|
||||
{&VertexDecoder::Step_TcU16Prescale, &VertexDecoderJitCache::Jit_TcU16Prescale},
|
||||
{&VertexDecoder::Step_TcFloatPrescale, &VertexDecoderJitCache::Jit_TcFloatPrescale},
|
||||
|
||||
*/
|
||||
{&VertexDecoder::Step_TcU16Through, &VertexDecoderJitCache::Jit_TcU16Through},
|
||||
/*
|
||||
{&VertexDecoder::Step_TcFloatThrough, &VertexDecoderJitCache::Jit_TcFloatThrough},
|
||||
{&VertexDecoder::Step_TcU16ThroughDouble, &VertexDecoderJitCache::Jit_TcU16ThroughDouble},
|
||||
|
||||
*/
|
||||
{&VertexDecoder::Step_NormalS8, &VertexDecoderJitCache::Jit_NormalS8},
|
||||
{&VertexDecoder::Step_NormalS16, &VertexDecoderJitCache::Jit_NormalS16},
|
||||
{&VertexDecoder::Step_NormalFloat, &VertexDecoderJitCache::Jit_NormalFloat},
|
||||
|
||||
/*
|
||||
{&VertexDecoder::Step_NormalS8Skin, &VertexDecoderJitCache::Jit_NormalS8Skin},
|
||||
{&VertexDecoder::Step_NormalS16Skin, &VertexDecoderJitCache::Jit_NormalS16Skin},
|
||||
{&VertexDecoder::Step_NormalFloatSkin, &VertexDecoderJitCache::Jit_NormalFloatSkin},
|
||||
@ -105,9 +108,13 @@ static const JitLookup jitLookup[] = {
|
||||
{&VertexDecoder::Step_Color5551, &VertexDecoderJitCache::Jit_Color5551},
|
||||
|
||||
{&VertexDecoder::Step_PosS8Through, &VertexDecoderJitCache::Jit_PosS8Through},
|
||||
*/
|
||||
{&VertexDecoder::Step_PosS16Through, &VertexDecoderJitCache::Jit_PosS16Through},
|
||||
/*
|
||||
{&VertexDecoder::Step_PosFloatThrough, &VertexDecoderJitCache::Jit_PosFloat},
|
||||
*/
|
||||
{&VertexDecoder::Step_PosS8, &VertexDecoderJitCache::Jit_PosS8},
|
||||
{&VertexDecoder::Step_PosS16, &VertexDecoderJitCache::Jit_PosS16},
|
||||
{&VertexDecoder::Step_PosFloat, &VertexDecoderJitCache::Jit_PosFloat},
|
||||
/*
|
||||
{&VertexDecoder::Step_PosS8Skin, &VertexDecoderJitCache::Jit_PosS8Skin},
|
||||
@ -240,6 +247,13 @@ void VertexDecoderJitCache::Jit_TcU16() {
|
||||
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcU16Through() {
|
||||
LDRH(INDEX_UNSIGNED, tempReg1, srcReg, dec_->tcoff);
|
||||
LDRH(INDEX_UNSIGNED, tempReg2, srcReg, dec_->tcoff + 2);
|
||||
ORR(tempReg1, tempReg1, tempReg2, ArithOption(tempReg2, ST_LSL, 16));
|
||||
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcFloat() {
|
||||
LDR(INDEX_UNSIGNED, tempReg1, srcReg, dec_->tcoff);
|
||||
LDR(INDEX_UNSIGNED, tempReg2, srcReg, dec_->tcoff + 4);
|
||||
@ -247,6 +261,20 @@ void VertexDecoderJitCache::Jit_TcFloat() {
|
||||
STR(INDEX_UNSIGNED, tempReg2, dstReg, dec_->decFmt.uvoff + 4);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_PosS8() {
|
||||
Jit_AnyS8ToFloat(dec_->posoff);
|
||||
STR(INDEX_UNSIGNED, src[0], dstReg, dec_->decFmt.posoff);
|
||||
STR(INDEX_UNSIGNED, src[1], dstReg, dec_->decFmt.posoff + 4);
|
||||
STR(INDEX_UNSIGNED, src[2], dstReg, dec_->decFmt.posoff + 8);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_PosS16() {
|
||||
Jit_AnyS16ToFloat(dec_->posoff);
|
||||
STR(INDEX_UNSIGNED, src[0], dstReg, dec_->decFmt.posoff);
|
||||
STR(INDEX_UNSIGNED, src[1], dstReg, dec_->decFmt.posoff + 4);
|
||||
STR(INDEX_UNSIGNED, src[2], dstReg, dec_->decFmt.posoff + 8);
|
||||
}
|
||||
|
||||
// Just copy 12 bytes.
|
||||
void VertexDecoderJitCache::Jit_PosFloat() {
|
||||
LDR(INDEX_UNSIGNED, tempReg1, srcReg, dec_->posoff);
|
||||
@ -256,3 +284,62 @@ void VertexDecoderJitCache::Jit_PosFloat() {
|
||||
STR(INDEX_UNSIGNED, tempReg2, dstReg, dec_->decFmt.posoff + 4);
|
||||
STR(INDEX_UNSIGNED, tempReg3, dstReg, dec_->decFmt.posoff + 8);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_PosS16Through() {
|
||||
LDRSH(INDEX_UNSIGNED, tempReg1, srcReg, dec_->posoff);
|
||||
LDRSH(INDEX_UNSIGNED, tempReg2, srcReg, dec_->posoff + 2);
|
||||
LDRH(INDEX_UNSIGNED, tempReg3, srcReg, dec_->posoff + 4);
|
||||
fp.SCVTF(fpScratchReg, tempReg1);
|
||||
fp.SCVTF(fpScratchReg2, tempReg2);
|
||||
fp.SCVTF(fpScratchReg3, tempReg3);
|
||||
STR(INDEX_UNSIGNED, fpScratchReg, dstReg, dec_->decFmt.posoff);
|
||||
STR(INDEX_UNSIGNED, fpScratchReg2, dstReg, dec_->decFmt.posoff + 4);
|
||||
STR(INDEX_UNSIGNED, fpScratchReg3, dstReg, dec_->decFmt.posoff + 8);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_NormalS8() {
|
||||
LDRB(INDEX_UNSIGNED, tempReg1, srcReg, dec_->nrmoff);
|
||||
LDRB(INDEX_UNSIGNED, tempReg2, srcReg, dec_->nrmoff + 1);
|
||||
LDRB(INDEX_UNSIGNED, tempReg3, srcReg, dec_->nrmoff + 2);
|
||||
ORR(tempReg1, tempReg1, tempReg2, ArithOption(tempReg2, ST_LSL, 8));
|
||||
ORR(tempReg1, tempReg1, tempReg3, ArithOption(tempReg3, ST_LSL, 16));
|
||||
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.nrmoff);
|
||||
}
|
||||
|
||||
// Copy 6 bytes and then 2 zeroes.
|
||||
void VertexDecoderJitCache::Jit_NormalS16() {
|
||||
LDRH(INDEX_UNSIGNED, tempReg1, srcReg, dec_->nrmoff);
|
||||
LDRH(INDEX_UNSIGNED, tempReg2, srcReg, dec_->nrmoff + 2);
|
||||
LDRH(INDEX_UNSIGNED, tempReg3, srcReg, dec_->nrmoff + 4);
|
||||
ORR(tempReg1, tempReg1, tempReg2, ArithOption(tempReg2, ST_LSL, 16));
|
||||
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.nrmoff);
|
||||
STR(INDEX_UNSIGNED, tempReg3, dstReg, dec_->decFmt.nrmoff + 4);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_NormalFloat() {
|
||||
LDR(INDEX_UNSIGNED, tempReg1, srcReg, dec_->nrmoff);
|
||||
LDR(INDEX_UNSIGNED, tempReg2, srcReg, dec_->nrmoff + 4);
|
||||
LDR(INDEX_UNSIGNED, tempReg3, srcReg, dec_->nrmoff + 8);
|
||||
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.nrmoff);
|
||||
STR(INDEX_UNSIGNED, tempReg2, dstReg, dec_->decFmt.nrmoff + 4);
|
||||
STR(INDEX_UNSIGNED, tempReg3, dstReg, dec_->decFmt.nrmoff + 8);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_AnyS8ToFloat(int srcoff) {
|
||||
// TODO: NEONize. In that case we'll leave all three floats in one register instead, so callers must change too.
|
||||
LDRSB(INDEX_UNSIGNED, tempReg1, srcReg, srcoff);
|
||||
LDRSB(INDEX_UNSIGNED, tempReg2, srcReg, srcoff + 1);
|
||||
LDRSB(INDEX_UNSIGNED, tempReg3, srcReg, srcoff + 2);
|
||||
fp.SCVTF(src[0], tempReg1, 7);
|
||||
fp.SCVTF(src[1], tempReg2, 7);
|
||||
fp.SCVTF(src[2], tempReg3, 7);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_AnyS16ToFloat(int srcoff) {
|
||||
LDRSH(INDEX_UNSIGNED, tempReg1, srcReg, srcoff);
|
||||
LDRSH(INDEX_UNSIGNED, tempReg2, srcReg, srcoff + 2);
|
||||
LDRSH(INDEX_UNSIGNED, tempReg3, srcReg, srcoff + 4);
|
||||
fp.SCVTF(src[0], tempReg1, 15);
|
||||
fp.SCVTF(src[1], tempReg2, 15);
|
||||
fp.SCVTF(src[2], tempReg3, 15);
|
||||
}
|
||||
|
@ -134,7 +134,8 @@ void PrintDecodedVertex(VertexReader &vtx) {
|
||||
printf("P: %f %f %f\n", pos[0], pos[1], pos[2]);
|
||||
}
|
||||
|
||||
VertexDecoder::VertexDecoder() : jitted_(0), decoded_(nullptr), ptr_(nullptr) {
|
||||
VertexDecoder::VertexDecoder() : jitted_(0), decoded_(nullptr), ptr_(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_WeightsU8() const
|
||||
@ -1093,7 +1094,11 @@ int VertexDecoder::ToString(char *output) const {
|
||||
return output - start;
|
||||
}
|
||||
|
||||
VertexDecoderJitCache::VertexDecoderJitCache() {
|
||||
VertexDecoderJitCache::VertexDecoderJitCache()
|
||||
#ifdef ARM64
|
||||
: fp(this)
|
||||
#endif
|
||||
{
|
||||
// 256k should be enough.
|
||||
AllocCodeSpace(1024 * 64 * 4);
|
||||
|
||||
|
@ -683,4 +683,7 @@ private:
|
||||
void Jit_AnyFloatMorph(int srcoff, int dstoff);
|
||||
|
||||
const VertexDecoder *dec_;
|
||||
#ifdef ARM64
|
||||
Arm64Gen::ARM64FloatEmitter fp;
|
||||
#endif
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user