Merge remote-tracking branch 'upstream/through-z-unsigned' into through-z

This commit is contained in:
Unknown W. Brackets 2014-02-13 07:40:55 -08:00
commit eaa10de9af
3 changed files with 16 additions and 2 deletions

View File

@ -462,9 +462,10 @@ void VertexDecoder::Step_PosS16Through() const
{
float *v = (float *)(decoded_ + decFmt.posoff);
const s16 *sv = (const s16*)(ptr_ + posoff);
const u16 *uv = (const u16*)(ptr_ + posoff);
v[0] = sv[0];
v[1] = sv[1];
v[2] = sv[2];
v[2] = uv[2];
}
void VertexDecoder::Step_PosFloatThrough() const

View File

@ -723,7 +723,7 @@ void VertexDecoderJitCache::Jit_PosS16Through() {
// TODO: SIMD
LDRSH(tempReg1, srcReg, dec_->posoff);
LDRSH(tempReg2, srcReg, dec_->posoff + 2);
LDRSH(tempReg3, srcReg, dec_->posoff + 4);
LDRH(tempReg3, srcReg, dec_->posoff + 4);
static const ARMReg tr[3] = { tempReg1, tempReg2, tempReg3 };
for (int i = 0; i < 3; i++) {
VMOV(fpScratchReg, tr[i]);

View File

@ -765,6 +765,9 @@ void VertexDecoderJitCache::Jit_PosS8Through() {
// Through expands into floats, always. Might want to look at changing this.
void VertexDecoderJitCache::Jit_PosS16Through() {
// This commented out version is likely slightly faster but treats all three as signed, which
// appears to be wrong.
/*
XORPS(XMM3, R(XMM3));
MOVQ_xmm(XMM1, MDisp(srcReg, dec_->posoff));
PUNPCKLWD(XMM1, R(XMM3));
@ -772,6 +775,16 @@ void VertexDecoderJitCache::Jit_PosS16Through() {
PSRAD(XMM1, 16); // Ugly sign extension, can be done faster in SSE4
CVTDQ2PS(XMM3, R(XMM1));
MOVUPS(MDisp(dstReg, dec_->decFmt.posoff), XMM3);
*/
MOVSX(32, 16, tempReg1, MDisp(srcReg, dec_->posoff));
MOVSX(32, 16, tempReg2, MDisp(srcReg, dec_->posoff + 2));
MOVZX(32, 16, tempReg3, MDisp(srcReg, dec_->posoff + 4)); // NOTE: MOVZX
CVTSI2SS(fpScratchReg, R(tempReg1));
MOVSS(MDisp(dstReg, dec_->decFmt.posoff), fpScratchReg);
CVTSI2SS(fpScratchReg, R(tempReg2));
MOVSS(MDisp(dstReg, dec_->decFmt.posoff + 4), fpScratchReg);
CVTSI2SS(fpScratchReg, R(tempReg3));
MOVSS(MDisp(dstReg, dec_->decFmt.posoff + 8), fpScratchReg);
}
// Copy 3 bytes and then a zero. Might as well copy four.