mirror of
https://github.com/libretro/pcsx2.git
synced 2025-01-16 22:48:36 +00:00
33adabb035
In the future these will be _M_X86_64, but for now this won't be the case.
229 lines
5.4 KiB
C++
229 lines
5.4 KiB
C++
/*
|
|
* Copyright (C) 2007-2009 Gabest
|
|
* http://www.gabest.org
|
|
*
|
|
* This Program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2, or (at your option)
|
|
* any later version.
|
|
*
|
|
* This Program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with GNU Make; see the file COPYING. If not, write to
|
|
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA.
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
*
|
|
*/
|
|
|
|
// TODO: x64
|
|
|
|
#include "stdafx.h"
|
|
#include "GPUSetupPrimCodeGenerator.h"
|
|
#include "GSVertexSW.h"
|
|
|
|
using namespace Xbyak;
|
|
|
|
static const int _args = 0;
|
|
static const int _vertex = _args + 4;
|
|
static const int _index = _args + 8;
|
|
static const int _dscan = _args + 12;
|
|
|
|
GPUSetupPrimCodeGenerator::GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize)
|
|
: GSCodeGenerator(code, maxsize)
|
|
, m_local(*(GPUScanlineLocalData*)param)
|
|
{
|
|
m_sel.key = key;
|
|
|
|
Generate();
|
|
}
|
|
|
|
void GPUSetupPrimCodeGenerator::Generate()
|
|
{
|
|
if(m_sel.tme && !m_sel.twin)
|
|
{
|
|
pcmpeqd(xmm0, xmm0);
|
|
|
|
if(m_sel.sprite)
|
|
{
|
|
// t = (GSVector4i(vertices[1].t) >> 8) - GSVector4i::x00000001();
|
|
|
|
mov(ecx, ptr[esp + _index]);
|
|
mov(ecx, ptr[ecx + sizeof(uint32) * 1]);
|
|
shl(ecx, 6); // * sizeof(GSVertexSW)
|
|
add(ecx, ptr[esp + _vertex]);
|
|
|
|
cvttps2dq(xmm1, ptr[ecx + offsetof(GSVertexSW, t)]);
|
|
psrld(xmm1, 8);
|
|
psrld(xmm0, 31);
|
|
psubd(xmm1, xmm0);
|
|
|
|
// t = t.ps32(t);
|
|
// t = t.upl16(t);
|
|
|
|
packssdw(xmm1, xmm1);
|
|
punpcklwd(xmm1, xmm1);
|
|
|
|
// m_local.twin[2].u = t.xxxx();
|
|
// m_local.twin[2].v = t.yyyy();
|
|
|
|
pshufd(xmm2, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
|
|
pshufd(xmm3, xmm1, _MM_SHUFFLE(1, 1, 1, 1));
|
|
|
|
movdqa(ptr[&m_local.twin[2].u], xmm2);
|
|
movdqa(ptr[&m_local.twin[2].v], xmm3);
|
|
}
|
|
else
|
|
{
|
|
// TODO: not really needed
|
|
|
|
// m_local.twin[2].u = GSVector4i::x00ff();
|
|
// m_local.twin[2].v = GSVector4i::x00ff();
|
|
|
|
psrlw(xmm0, 8);
|
|
|
|
movdqa(ptr[&m_local.twin[2].u], xmm0);
|
|
movdqa(ptr[&m_local.twin[2].v], xmm0);
|
|
}
|
|
}
|
|
|
|
if(m_sel.tme || m_sel.iip && m_sel.tfx != 3)
|
|
{
|
|
mov(edx, dword[esp + _dscan]);
|
|
|
|
for(int i = 0; i < 3; i++)
|
|
{
|
|
movaps(Xmm(5 + i), ptr[&m_shift[i]]);
|
|
}
|
|
|
|
// GSVector4 dt = dscan.t;
|
|
// GSVector4 dc = dscan.c;
|
|
|
|
movaps(xmm4, ptr[edx + offsetof(GSVertexSW, c)]);
|
|
movaps(xmm3, ptr[edx + offsetof(GSVertexSW, t)]);
|
|
|
|
// GSVector4i dtc8 = GSVector4i(dt * 8.0f).ps32(GSVector4i(dc * 8.0f));
|
|
|
|
movaps(xmm1, xmm3);
|
|
mulps(xmm1, xmm5);
|
|
cvttps2dq(xmm1, xmm1);
|
|
movaps(xmm2, xmm4);
|
|
mulps(xmm2, xmm5);
|
|
cvttps2dq(xmm2, xmm2);
|
|
packssdw(xmm1, xmm2);
|
|
|
|
if(m_sel.tme)
|
|
{
|
|
// m_local.d8.st = dtc8.upl16(dtc8);
|
|
|
|
movdqa(xmm0, xmm1);
|
|
punpcklwd(xmm0, xmm0);
|
|
movdqa(ptr[&m_local.d8.st], xmm0);
|
|
}
|
|
|
|
if(m_sel.iip && m_sel.tfx != 3)
|
|
{
|
|
// m_local.d8.c = dtc8.uph16(dtc8);
|
|
|
|
punpckhwd(xmm1, xmm1);
|
|
movdqa(ptr[&m_local.d8.c], xmm1);
|
|
}
|
|
|
|
// xmm3 = dt
|
|
// xmm4 = dc
|
|
// xmm6 = ps0123
|
|
// xmm7 = ps4567
|
|
// xmm0, xmm1, xmm2, xmm5 = free
|
|
|
|
if(m_sel.tme)
|
|
{
|
|
// GSVector4 dtx = dt.xxxx();
|
|
// GSVector4 dty = dt.yyyy();
|
|
|
|
movaps(xmm0, xmm3);
|
|
shufps(xmm3, xmm3, _MM_SHUFFLE(0, 0, 0, 0));
|
|
shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
|
|
|
|
// m_local.d.s = GSVector4i(dtx * ps0123).ps32(GSVector4i(dtx * ps4567));
|
|
|
|
movaps(xmm1, xmm3);
|
|
mulps(xmm3, xmm6);
|
|
mulps(xmm1, xmm7);
|
|
cvttps2dq(xmm3, xmm3);
|
|
cvttps2dq(xmm1, xmm1);
|
|
packssdw(xmm3, xmm1);
|
|
movdqa(ptr[&m_local.d.s], xmm3);
|
|
|
|
// m_local.d.t = GSVector4i(dty * ps0123).ps32(GSVector4i(dty * ps4567));
|
|
|
|
movaps(xmm1, xmm0);
|
|
mulps(xmm0, xmm6);
|
|
mulps(xmm1, xmm7);
|
|
cvttps2dq(xmm0, xmm0);
|
|
cvttps2dq(xmm1, xmm1);
|
|
packssdw(xmm0, xmm1);
|
|
movdqa(ptr[&m_local.d.t], xmm0);
|
|
}
|
|
|
|
// xmm4 = dc
|
|
// xmm6 = ps0123
|
|
// xmm7 = ps4567
|
|
// xmm0, xmm1, zmm2, xmm3, xmm5 = free
|
|
|
|
if(m_sel.iip && m_sel.tfx != 3)
|
|
{
|
|
// GSVector4 dcx = dc.xxxx();
|
|
// GSVector4 dcy = dc.yyyy();
|
|
// GSVector4 dcz = dc.zzzz();
|
|
|
|
movaps(xmm0, xmm4);
|
|
movaps(xmm1, xmm4);
|
|
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
|
shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
|
|
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
|
|
|
// m_local.d.r = GSVector4i(dcx * ps0123).ps32(GSVector4i(dcx * ps4567));
|
|
|
|
movaps(xmm2, xmm4);
|
|
mulps(xmm4, xmm6);
|
|
mulps(xmm2, xmm7);
|
|
cvttps2dq(xmm4, xmm4);
|
|
cvttps2dq(xmm2, xmm2);
|
|
packssdw(xmm4, xmm2);
|
|
movdqa(ptr[&m_local.d.r], xmm4);
|
|
|
|
// m_local.d.g = GSVector4i(dcy * ps0123).ps32(GSVector4i(dcy * ps4567));
|
|
|
|
movaps(xmm2, xmm0);
|
|
mulps(xmm0, xmm6);
|
|
mulps(xmm2, xmm7);
|
|
cvttps2dq(xmm0, xmm0);
|
|
cvttps2dq(xmm2, xmm2);
|
|
packssdw(xmm0, xmm2);
|
|
movdqa(ptr[&m_local.d.g], xmm0);
|
|
|
|
// m_local.d.b = GSVector4i(dcz * ps0123).ps32(GSVector4i(dcz * ps4567));
|
|
|
|
movaps(xmm2, xmm1);
|
|
mulps(xmm1, xmm6);
|
|
mulps(xmm2, xmm7);
|
|
cvttps2dq(xmm1, xmm1);
|
|
cvttps2dq(xmm2, xmm2);
|
|
packssdw(xmm1, xmm2);
|
|
movdqa(ptr[&m_local.d.b], xmm1);
|
|
}
|
|
}
|
|
|
|
ret();
|
|
}
|
|
|
|
const GSVector4 GPUSetupPrimCodeGenerator::m_shift[3] =
|
|
{
|
|
GSVector4(8.0f, 8.0f, 8.0f, 8.0f),
|
|
GSVector4(0.0f, 1.0f, 2.0f, 3.0f),
|
|
GSVector4(4.0f, 5.0f, 6.0f, 7.0f),
|
|
};
|