mirror of
https://github.com/libretro/ppsspp.git
synced 2025-01-20 23:54:48 +00:00
Merge remote-tracking branch 'upstream/through-z-unsigned' into through-z
This commit is contained in:
commit
eaa10de9af
@ -462,9 +462,10 @@ void VertexDecoder::Step_PosS16Through() const
|
||||
{
|
||||
float *v = (float *)(decoded_ + decFmt.posoff);
|
||||
const s16 *sv = (const s16*)(ptr_ + posoff);
|
||||
const u16 *uv = (const u16*)(ptr_ + posoff);
|
||||
v[0] = sv[0];
|
||||
v[1] = sv[1];
|
||||
v[2] = sv[2];
|
||||
v[2] = uv[2];
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_PosFloatThrough() const
|
||||
|
@ -723,7 +723,7 @@ void VertexDecoderJitCache::Jit_PosS16Through() {
|
||||
// TODO: SIMD
|
||||
LDRSH(tempReg1, srcReg, dec_->posoff);
|
||||
LDRSH(tempReg2, srcReg, dec_->posoff + 2);
|
||||
LDRSH(tempReg3, srcReg, dec_->posoff + 4);
|
||||
LDRH(tempReg3, srcReg, dec_->posoff + 4);
|
||||
static const ARMReg tr[3] = { tempReg1, tempReg2, tempReg3 };
|
||||
for (int i = 0; i < 3; i++) {
|
||||
VMOV(fpScratchReg, tr[i]);
|
||||
|
@ -765,6 +765,9 @@ void VertexDecoderJitCache::Jit_PosS8Through() {
|
||||
|
||||
// Through expands into floats, always. Might want to look at changing this.
|
||||
void VertexDecoderJitCache::Jit_PosS16Through() {
|
||||
// This commented out version is likely slightly faster but treats all three as signed, which
|
||||
// appears to be wrong.
|
||||
/*
|
||||
XORPS(XMM3, R(XMM3));
|
||||
MOVQ_xmm(XMM1, MDisp(srcReg, dec_->posoff));
|
||||
PUNPCKLWD(XMM1, R(XMM3));
|
||||
@ -772,6 +775,16 @@ void VertexDecoderJitCache::Jit_PosS16Through() {
|
||||
PSRAD(XMM1, 16); // Ugly sign extension, can be done faster in SSE4
|
||||
CVTDQ2PS(XMM3, R(XMM1));
|
||||
MOVUPS(MDisp(dstReg, dec_->decFmt.posoff), XMM3);
|
||||
*/
|
||||
MOVSX(32, 16, tempReg1, MDisp(srcReg, dec_->posoff));
|
||||
MOVSX(32, 16, tempReg2, MDisp(srcReg, dec_->posoff + 2));
|
||||
MOVZX(32, 16, tempReg3, MDisp(srcReg, dec_->posoff + 4)); // NOTE: MOVZX
|
||||
CVTSI2SS(fpScratchReg, R(tempReg1));
|
||||
MOVSS(MDisp(dstReg, dec_->decFmt.posoff), fpScratchReg);
|
||||
CVTSI2SS(fpScratchReg, R(tempReg2));
|
||||
MOVSS(MDisp(dstReg, dec_->decFmt.posoff + 4), fpScratchReg);
|
||||
CVTSI2SS(fpScratchReg, R(tempReg3));
|
||||
MOVSS(MDisp(dstReg, dec_->decFmt.posoff + 8), fpScratchReg);
|
||||
}
|
||||
|
||||
// Copy 3 bytes and then a zero. Might as well copy four.
|
||||
|
Loading…
x
Reference in New Issue
Block a user