mirror of
https://github.com/libretro/ppsspp.git
synced 2025-02-26 03:35:27 +00:00
Vertex JIT: Add some missing functions to ARM/ARM64 decoders.
This commit is contained in:
parent
f9d2fdac4c
commit
98e0ccf1e1
@ -120,6 +120,8 @@ static const JitLookup jitLookup[] = {
|
||||
{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},
|
||||
|
||||
{&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat},
|
||||
{&VertexDecoder::Step_TcU8ToFloat, &VertexDecoderJitCache::Jit_TcU8ToFloat},
|
||||
{&VertexDecoder::Step_TcU16ToFloat, &VertexDecoderJitCache::Jit_TcU16ToFloat},
|
||||
{&VertexDecoder::Step_TcU16Double, &VertexDecoderJitCache::Jit_TcU16Double},
|
||||
|
||||
{&VertexDecoder::Step_TcU8Prescale, &VertexDecoderJitCache::Jit_TcU8Prescale},
|
||||
@ -646,6 +648,28 @@ void VertexDecoderJitCache::Jit_TcU8Prescale() {
|
||||
}
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcU8ToFloat() {
|
||||
if (cpu_info.bNEON) {
|
||||
// TODO: Needs testing
|
||||
ADD(scratchReg, srcReg, dec_->tcoff);
|
||||
VLD1_lane(I_16, neonScratchReg, scratchReg, 0, false);
|
||||
VMOVL(I_8 | I_UNSIGNED, neonScratchRegQ, neonScratchReg); // Widen to 16-bit
|
||||
VMOVL(I_16 | I_UNSIGNED, neonScratchRegQ, neonScratchReg); // Widen to 32-bit
|
||||
VCVT(F_32 | I_UNSIGNED, neonScratchRegQ, neonScratchRegQ);
|
||||
ADD(scratchReg2, dstReg, dec_->decFmt.uvoff);
|
||||
VST1(F_32, neonScratchReg, scratchReg2, 1, ALIGN_NONE);
|
||||
} else {
|
||||
LDRB(tempReg1, srcReg, dec_->tcoff);
|
||||
LDRB(tempReg2, srcReg, dec_->tcoff + 1);
|
||||
VMOV(fpScratchReg, tempReg1);
|
||||
VMOV(fpScratchReg2, tempReg2);
|
||||
VCVT(fpScratchReg, fpScratchReg, TO_FLOAT);
|
||||
VCVT(fpScratchReg2, fpScratchReg2, TO_FLOAT);
|
||||
VSTR(fpScratchReg, dstReg, dec_->decFmt.uvoff);
|
||||
VSTR(fpScratchReg2, dstReg, dec_->decFmt.uvoff + 4);
|
||||
}
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcU16Prescale() {
|
||||
if (cpu_info.bNEON) {
|
||||
// TODO: Needs testing
|
||||
@ -673,6 +697,27 @@ void VertexDecoderJitCache::Jit_TcU16Prescale() {
|
||||
}
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcU16ToFloat() {
|
||||
if (cpu_info.bNEON) {
|
||||
// TODO: Needs testing
|
||||
ADD(scratchReg, srcReg, dec_->tcoff);
|
||||
VLD1_lane(I_32, neonScratchReg, scratchReg, 0, false);
|
||||
VMOVL(I_16 | I_UNSIGNED, neonScratchRegQ, neonScratchReg); // Widen to 32-bit
|
||||
VCVT(F_32 | I_UNSIGNED, neonScratchRegQ, neonScratchRegQ);
|
||||
ADD(scratchReg2, dstReg, dec_->decFmt.uvoff);
|
||||
VST1(F_32, neonScratchReg, scratchReg2, 1, ALIGN_NONE);
|
||||
} else {
|
||||
LDRH(tempReg1, srcReg, dec_->tcoff);
|
||||
LDRH(tempReg2, srcReg, dec_->tcoff + 2);
|
||||
VMOV(fpScratchReg, tempReg1);
|
||||
VMOV(fpScratchReg2, tempReg2);
|
||||
VCVT(fpScratchReg, fpScratchReg, TO_FLOAT);
|
||||
VCVT(fpScratchReg2, fpScratchReg2, TO_FLOAT);
|
||||
VSTR(fpScratchReg, dstReg, dec_->decFmt.uvoff);
|
||||
VSTR(fpScratchReg2, dstReg, dec_->decFmt.uvoff + 4);
|
||||
}
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcFloatPrescale() {
|
||||
if (cpu_info.bNEON) {
|
||||
ADD(scratchReg, srcReg, dec_->tcoff);
|
||||
|
@ -92,11 +92,15 @@ static const JitLookup jitLookup[] = {
|
||||
{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
|
||||
{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},
|
||||
|
||||
{&VertexDecoder::Step_TcU8ToFloat, &VertexDecoderJitCache::Jit_TcU8ToFloat},
|
||||
{&VertexDecoder::Step_TcU16ToFloat, &VertexDecoderJitCache::Jit_TcU16ToFloat},
|
||||
{&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat},
|
||||
{&VertexDecoder::Step_TcU16Double, &VertexDecoderJitCache::Jit_TcU16Double},
|
||||
|
||||
{&VertexDecoder::Step_TcU8Prescale, &VertexDecoderJitCache::Jit_TcU8Prescale},
|
||||
{&VertexDecoder::Step_TcU16Prescale, &VertexDecoderJitCache::Jit_TcU16Prescale},
|
||||
{&VertexDecoder::Step_TcFloatPrescale, &VertexDecoderJitCache::Jit_TcFloatPrescale},
|
||||
|
||||
{&VertexDecoder::Step_TcU16Through, &VertexDecoderJitCache::Jit_TcU16Through},
|
||||
{&VertexDecoder::Step_TcFloatThrough, &VertexDecoderJitCache::Jit_TcFloatThrough},
|
||||
{&VertexDecoder::Step_TcU16ThroughDouble, &VertexDecoderJitCache::Jit_TcU16ThroughDouble},
|
||||
@ -631,6 +635,14 @@ void VertexDecoderJitCache::Jit_TcU8Prescale() {
|
||||
fp.STUR(64, neonScratchRegD, dstReg, dec_->decFmt.uvoff);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcU8ToFloat() {
|
||||
fp.LDUR(16, neonScratchRegD, srcReg, dec_->tcoff);
|
||||
fp.UXTL(8, neonScratchRegQ, neonScratchRegD); // Widen to 16-bit
|
||||
fp.UXTL(16, neonScratchRegQ, neonScratchRegD); // Widen to 32-bit
|
||||
fp.UCVTF(32, neonScratchRegD, neonScratchRegD);
|
||||
fp.STUR(64, neonScratchRegD, dstReg, dec_->decFmt.uvoff);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcU16Prescale() {
|
||||
fp.LDUR(32, neonScratchRegD, srcReg, dec_->tcoff);
|
||||
fp.UXTL(16, neonScratchRegQ, neonScratchRegD); // Widen to 32-bit
|
||||
@ -640,6 +652,13 @@ void VertexDecoderJitCache::Jit_TcU16Prescale() {
|
||||
fp.STUR(64, neonScratchRegD, dstReg, dec_->decFmt.uvoff);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcU16ToFloat() {
|
||||
fp.LDUR(32, neonScratchRegD, srcReg, dec_->tcoff);
|
||||
fp.UXTL(16, neonScratchRegQ, neonScratchRegD); // Widen to 32-bit
|
||||
fp.UCVTF(32, neonScratchRegD, neonScratchRegD);
|
||||
fp.STUR(64, neonScratchRegD, dstReg, dec_->decFmt.uvoff);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcFloatPrescale() {
|
||||
fp.LDUR(64, neonScratchRegD, srcReg, dec_->tcoff);
|
||||
fp.FMUL(32, neonScratchRegD, neonScratchRegD, neonUVScaleReg); // TODO: FMLA
|
||||
|
@ -106,6 +106,11 @@ static const JitLookup jitLookup[] = {
|
||||
{&VertexDecoder::Step_TcU16Prescale, &VertexDecoderJitCache::Jit_TcU16Prescale},
|
||||
{&VertexDecoder::Step_TcFloatPrescale, &VertexDecoderJitCache::Jit_TcFloatPrescale},
|
||||
|
||||
{&VertexDecoder::Step_TcU16Through, &VertexDecoderJitCache::Jit_TcU16Through},
|
||||
{&VertexDecoder::Step_TcU16ThroughToFloat, &VertexDecoderJitCache::Jit_TcU16ThroughToFloat},
|
||||
{&VertexDecoder::Step_TcFloatThrough, &VertexDecoderJitCache::Jit_TcFloatThrough},
|
||||
{&VertexDecoder::Step_TcU16ThroughDouble, &VertexDecoderJitCache::Jit_TcU16ThroughDouble},
|
||||
|
||||
{&VertexDecoder::Step_TcU8MorphToFloat, &VertexDecoderJitCache::Jit_TcU8MorphToFloat},
|
||||
{&VertexDecoder::Step_TcU16MorphToFloat, &VertexDecoderJitCache::Jit_TcU16MorphToFloat},
|
||||
{&VertexDecoder::Step_TcFloatMorph, &VertexDecoderJitCache::Jit_TcFloatMorph},
|
||||
@ -113,11 +118,6 @@ static const JitLookup jitLookup[] = {
|
||||
{&VertexDecoder::Step_TcU16PrescaleMorph, &VertexDecoderJitCache::Jit_TcU16PrescaleMorph},
|
||||
{&VertexDecoder::Step_TcFloatPrescaleMorph, &VertexDecoderJitCache::Jit_TcFloatPrescaleMorph},
|
||||
|
||||
{&VertexDecoder::Step_TcU16Through, &VertexDecoderJitCache::Jit_TcU16Through},
|
||||
{&VertexDecoder::Step_TcU16ThroughToFloat, &VertexDecoderJitCache::Jit_TcU16ThroughToFloat},
|
||||
{&VertexDecoder::Step_TcFloatThrough, &VertexDecoderJitCache::Jit_TcFloatThrough},
|
||||
{&VertexDecoder::Step_TcU16ThroughDouble, &VertexDecoderJitCache::Jit_TcU16ThroughDouble},
|
||||
|
||||
{&VertexDecoder::Step_NormalS8, &VertexDecoderJitCache::Jit_NormalS8},
|
||||
{&VertexDecoder::Step_NormalS8ToFloat, &VertexDecoderJitCache::Jit_NormalS8ToFloat},
|
||||
{&VertexDecoder::Step_NormalS16, &VertexDecoderJitCache::Jit_NormalS16},
|
||||
@ -244,6 +244,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
|
||||
JumpTarget loopStart = GetCodePtr();
|
||||
for (int i = 0; i < dec.numSteps_; i++) {
|
||||
if (!CompileStep(dec, i)) {
|
||||
EndWrite();
|
||||
// Reset the code ptr and return zero to indicate that we failed.
|
||||
SetCodePtr(const_cast<u8 *>(start));
|
||||
return 0;
|
||||
|
@ -128,6 +128,10 @@ DrawEngineGLES::DrawEngineGLES()
|
||||
dcid_(0),
|
||||
fboTexNeedBind_(false),
|
||||
fboTexBound_(false) {
|
||||
|
||||
decOptions_.expandAllWeightsToFloat = false;
|
||||
decOptions_.expand8BitNormalsToFloat = false;
|
||||
|
||||
decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
|
||||
bufferDecimationCounter_ = VERTEXCACHE_NAME_DECIMATION_INTERVAL;
|
||||
// Allocate nicely aligned memory. Maybe graphics drivers will
|
||||
|
Loading…
x
Reference in New Issue
Block a user