mirror of
https://github.com/FEX-Emu/FEX.git
synced 2024-12-14 01:18:46 +00:00
Adds new IR ops to interpreter and x86 JIT
This commit is contained in:
parent
3bad1eafbc
commit
c336a09641
@ -2136,6 +2136,155 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
|
||||
memcpy(GDP, Tmp, Op->RegisterSize);
|
||||
break;
|
||||
}
|
||||
#define DO_VECTOR_1SRC_2TYPE_OP(size, type, type2, func, max) \
|
||||
case size: { \
|
||||
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
|
||||
auto *Src_d = reinterpret_cast<type2*>(Src); \
|
||||
for (uint8_t i = 0; i < Elements; ++i) { \
|
||||
Dst_d[i] = (type)func(Src_d[i], max); \
|
||||
} \
|
||||
break; \
|
||||
}
|
||||
#define DO_VECTOR_1SRC_2TYPE_OP_TOP(size, type, type2, func, max) \
|
||||
case size: { \
|
||||
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
|
||||
auto *Src_d = reinterpret_cast<type2*>(Src2); \
|
||||
memcpy(Dst_d, Src1, Elements * sizeof(type2));\
|
||||
for (uint8_t i = 0; i < Elements; ++i) { \
|
||||
Dst_d[i+Elements] = (type)func(Src_d[i], max); \
|
||||
} \
|
||||
break; \
|
||||
}
|
||||
|
||||
case IR::OP_VSQXTUN: {
|
||||
auto Op = IROp->C<IR::IROp_VSQXTUN>();
|
||||
void *Src = GetSrc<void*>(Op->Header.Args[0]);
|
||||
uint8_t Tmp[16];
|
||||
|
||||
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
|
||||
|
||||
auto Func = [](auto a, auto max) { return std::max(std::min(a, (decltype(a))max), (decltype(a))0); };
|
||||
switch (Op->ElementSize) {
|
||||
DO_VECTOR_1SRC_2TYPE_OP(2, int16_t, uint8_t, Func, (1 << 8) - 1)
|
||||
DO_VECTOR_1SRC_2TYPE_OP(4, int32_t, uint16_t, Func, (1 << 16) - 1)
|
||||
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
|
||||
}
|
||||
memcpy(GDP, Tmp, Op->RegisterSize);
|
||||
break;
|
||||
}
|
||||
case IR::OP_VSQXTUN2: {
|
||||
auto Op = IROp->C<IR::IROp_VSQXTUN2>();
|
||||
void *Src1 = GetSrc<void*>(Op->Header.Args[0]);
|
||||
void *Src2 = GetSrc<void*>(Op->Header.Args[1]);
|
||||
uint8_t Tmp[16];
|
||||
|
||||
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
|
||||
|
||||
auto Func = [](auto a, auto max) { return std::max(std::min(a, (decltype(a))max), (decltype(a))0); };
|
||||
switch (Op->ElementSize) {
|
||||
DO_VECTOR_1SRC_2TYPE_OP_TOP(2, int16_t, uint8_t, Func, (1 << 8) - 1)
|
||||
DO_VECTOR_1SRC_2TYPE_OP_TOP(4, int32_t, uint16_t, Func, (1 << 16) - 1)
|
||||
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
|
||||
}
|
||||
memcpy(GDP, Tmp, Op->RegisterSize);
|
||||
break;
|
||||
}
|
||||
case IR::OP_VUCVTF: {
|
||||
auto Op = IROp->C<IR::IROp_VUCVTF>();
|
||||
void *Src = GetSrc<void*>(Op->Header.Args[0]);
|
||||
uint8_t Tmp[16];
|
||||
|
||||
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
|
||||
|
||||
auto Func = [](auto a, auto max) { return a; };
|
||||
switch (Op->ElementSize) {
|
||||
DO_VECTOR_1SRC_2TYPE_OP(4, float, uint32_t, Func, 0)
|
||||
DO_VECTOR_1SRC_2TYPE_OP(8, double, uint64_t, Func, 0)
|
||||
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
|
||||
}
|
||||
memcpy(GDP, Tmp, Op->RegisterSize);
|
||||
break;
|
||||
}
|
||||
case IR::OP_VSCVTF: {
|
||||
auto Op = IROp->C<IR::IROp_VSCVTF>();
|
||||
void *Src = GetSrc<void*>(Op->Header.Args[0]);
|
||||
uint8_t Tmp[16];
|
||||
|
||||
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
|
||||
|
||||
auto Func = [](auto a, auto max) { return a; };
|
||||
switch (Op->ElementSize) {
|
||||
DO_VECTOR_1SRC_2TYPE_OP(4, float, int32_t, Func, 0)
|
||||
DO_VECTOR_1SRC_2TYPE_OP(8, double, int64_t, Func, 0)
|
||||
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
|
||||
}
|
||||
memcpy(GDP, Tmp, Op->RegisterSize);
|
||||
break;
|
||||
}
|
||||
case IR::OP_VFCVTL: {
|
||||
auto Op = IROp->C<IR::IROp_VFCVTL>();
|
||||
void *Src = GetSrc<void*>(Op->Header.Args[0]);
|
||||
uint8_t Tmp[16];
|
||||
|
||||
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
|
||||
|
||||
auto Func = [](auto a, auto max) { return a; };
|
||||
switch (Op->ElementSize) {
|
||||
DO_VECTOR_1SRC_2TYPE_OP(4, double, float, Func, 0)
|
||||
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
|
||||
}
|
||||
memcpy(GDP, Tmp, Op->RegisterSize);
|
||||
break;
|
||||
}
|
||||
case IR::OP_VFCVTN: {
|
||||
auto Op = IROp->C<IR::IROp_VFCVTN>();
|
||||
void *Src = GetSrc<void*>(Op->Header.Args[0]);
|
||||
uint8_t Tmp[16];
|
||||
|
||||
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
|
||||
|
||||
auto Func = [](auto a, auto max) { return a; };
|
||||
switch (Op->ElementSize) {
|
||||
DO_VECTOR_1SRC_2TYPE_OP(8, float, double, Func, 0)
|
||||
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
|
||||
}
|
||||
memcpy(GDP, Tmp, Op->RegisterSize);
|
||||
break;
|
||||
}
|
||||
case IR::OP_VSXTL: {
|
||||
auto Op = IROp->C<IR::IROp_VSXTL>();
|
||||
void *Src = GetSrc<void*>(Op->Header.Args[0]);
|
||||
uint8_t Tmp[16];
|
||||
|
||||
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
|
||||
|
||||
auto Func = [](auto a, auto max) { return a; };
|
||||
switch (Op->ElementSize) {
|
||||
DO_VECTOR_1SRC_2TYPE_OP(1, int16_t, int8_t, Func, 0)
|
||||
DO_VECTOR_1SRC_2TYPE_OP(2, int32_t, int16_t, Func, 0)
|
||||
DO_VECTOR_1SRC_2TYPE_OP(4, int64_t, int32_t, Func, 0)
|
||||
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
|
||||
}
|
||||
memcpy(GDP, Tmp, Op->RegisterSize);
|
||||
break;
|
||||
}
|
||||
case IR::OP_VUXTL: {
|
||||
auto Op = IROp->C<IR::IROp_VUXTL>();
|
||||
void *Src = GetSrc<void*>(Op->Header.Args[0]);
|
||||
uint8_t Tmp[16];
|
||||
|
||||
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
|
||||
|
||||
auto Func = [](auto a, auto max) { return a; };
|
||||
switch (Op->ElementSize) {
|
||||
DO_VECTOR_1SRC_2TYPE_OP(1, uint16_t, uint8_t, Func, 0)
|
||||
DO_VECTOR_1SRC_2TYPE_OP(2, uint32_t, uint16_t, Func, 0)
|
||||
DO_VECTOR_1SRC_2TYPE_OP(4, uint64_t, uint32_t, Func, 0)
|
||||
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
|
||||
}
|
||||
memcpy(GDP, Tmp, Op->RegisterSize);
|
||||
break;
|
||||
}
|
||||
case IR::OP_VUMIN: {
|
||||
auto Op = IROp->C<IR::IROp_VUMin>();
|
||||
void *Src1 = GetSrc<void*>(Op->Header.Args[0]);
|
||||
@ -2561,6 +2710,22 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
|
||||
memcpy(GDP, &Dst, Op->RegisterSize);
|
||||
break;
|
||||
}
|
||||
case IR::OP_VINSGPR: {
|
||||
auto Op = IROp->C<IR::IROp_VInsGPR>();
|
||||
__uint128_t Src1 = *GetSrc<__uint128_t*>(Op->Header.Args[0]);
|
||||
__uint128_t Src2 = *GetSrc<__uint128_t*>(Op->Header.Args[1]);
|
||||
|
||||
uint64_t Offset = Op->Index * 8;
|
||||
__uint128_t Mask = (1ULL << (Op->ElementSize * 8)) - 1;
|
||||
Mask <<= Offset;
|
||||
Mask = ~Mask;
|
||||
__uint128_t Dst = Src1 & Mask;
|
||||
Dst |= Src2 << Offset;
|
||||
|
||||
memcpy(GDP, &Dst, Op->RegisterSize);
|
||||
break;
|
||||
}
|
||||
|
||||
case IR::OP_SCVTF: {
|
||||
auto Op = IROp->C<IR::IROp_SCVTF>();
|
||||
if (Op->ElementSize == 8) {
|
||||
|
@ -1594,7 +1594,32 @@ void *JITCore::CompileCode([[maybe_unused]] FEXCore::IR::IRListView<true> const
|
||||
|
||||
break;
|
||||
}
|
||||
case IR::OP_VINSGPR: {
|
||||
auto Op = IROp->C<IR::IROp_VInsGPR>();
|
||||
movapd(GetDst(Node), GetSrc(Op->Header.Args[0].ID()));
|
||||
|
||||
switch (Op->ElementSize) {
|
||||
case 1: {
|
||||
pinsrb(GetDst(Node), GetSrc<RA_8>(Op->Header.Args[0].ID()), Op->Index);
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
pinsrw(GetDst(Node), GetSrc<RA_16>(Op->Header.Args[0].ID()), Op->Index);
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
pinsrd(GetDst(Node), GetSrc<RA_32>(Op->Header.Args[0].ID()), Op->Index);
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
pinsrq(GetDst(Node), GetSrc<RA_64>(Op->Header.Args[0].ID()), Op->Index);
|
||||
break;
|
||||
}
|
||||
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case IR::OP_PRINT: {
|
||||
auto Op = IROp->C<IR::IROp_Print>();
|
||||
|
||||
@ -1957,6 +1982,71 @@ void *JITCore::CompileCode([[maybe_unused]] FEXCore::IR::IRListView<true> const
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::OP_VFMAX: {
|
||||
auto Op = IROp->C<IR::IROp_VFMax>();
|
||||
if (Op->ElementSize == Op->RegisterSize) {
|
||||
// Scalar
|
||||
switch (Op->ElementSize) {
|
||||
case 4: {
|
||||
vmaxss(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID()));
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
vmaxsd(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID()));
|
||||
break;
|
||||
}
|
||||
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Vector
|
||||
switch (Op->ElementSize) {
|
||||
case 4: {
|
||||
vmaxps(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID()));
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
vmaxpd(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID()));
|
||||
break;
|
||||
}
|
||||
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::OP_VFMIN: {
|
||||
auto Op = IROp->C<IR::IROp_VFMin>();
|
||||
if (Op->ElementSize == Op->RegisterSize) {
|
||||
// Scalar
|
||||
switch (Op->ElementSize) {
|
||||
case 4: {
|
||||
vminss(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID()));
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
vminsd(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID()));
|
||||
break;
|
||||
}
|
||||
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Vector
|
||||
switch (Op->ElementSize) {
|
||||
case 4: {
|
||||
vminps(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID()));
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
vminpd(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID()));
|
||||
break;
|
||||
}
|
||||
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case IR::OP_VFRECP: {
|
||||
auto Op = IROp->C<IR::IROp_VFRecp>();
|
||||
if (Op->ElementSize == Op->RegisterSize) {
|
||||
@ -2037,6 +2127,80 @@ void *JITCore::CompileCode([[maybe_unused]] FEXCore::IR::IRListView<true> const
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::OP_VSXTL: {
|
||||
auto Op = IROp->C<IR::IROp_VSXTL>();
|
||||
switch (Op->ElementSize) {
|
||||
case 1:
|
||||
pmovsxbw(GetDst(Node), GetSrc(Op->Header.Args[0].ID()));
|
||||
break;
|
||||
case 2:
|
||||
pmovsxwd(GetDst(Node), GetSrc(Op->Header.Args[0].ID()));
|
||||
break;
|
||||
case 4:
|
||||
pmovsxdq(GetDst(Node), GetSrc(Op->Header.Args[0].ID()));
|
||||
break;
|
||||
default: LogMan::Msg::A("Unknown element size: %d", Op->ElementSize);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::OP_VUXTL: {
|
||||
auto Op = IROp->C<IR::IROp_VUXTL>();
|
||||
switch (Op->ElementSize) {
|
||||
case 1:
|
||||
pmovzxbw(GetDst(Node), GetSrc(Op->Header.Args[0].ID()));
|
||||
break;
|
||||
case 2:
|
||||
pmovzxwd(GetDst(Node), GetSrc(Op->Header.Args[0].ID()));
|
||||
break;
|
||||
case 4:
|
||||
pmovzxdq(GetDst(Node), GetSrc(Op->Header.Args[0].ID()));
|
||||
break;
|
||||
default: LogMan::Msg::A("Unknown element size: %d", Op->ElementSize);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::OP_VSCVTF: {
|
||||
auto Op = IROp->C<IR::IROp_VSCVTF>();
|
||||
switch (Op->ElementSize) {
|
||||
case 4:
|
||||
cvtdq2ps(GetDst(Node), GetSrc(Op->Header.Args[0].ID()));
|
||||
break;
|
||||
case 8:
|
||||
// This operation is a bit disgusting in x86
|
||||
// There is no vector form of this instruction until AVX512VL + AVX512DQ (vcvtqq2pd)
|
||||
// 1) First extract the top 64bits
|
||||
// 2) Do a scalar conversion on each
|
||||
// 3) Make sure to merge them together at the end
|
||||
pextrq(rax, GetSrc(Op->Header.Args[0].ID()), 1);
|
||||
pextrq(rcx, GetSrc(Op->Header.Args[0].ID()), 0);
|
||||
cvtsi2ss(GetDst(Node), rcx);
|
||||
cvtsi2ss(xmm15, rax);
|
||||
movlhps(GetDst(Node), xmm15);
|
||||
break;
|
||||
default: LogMan::Msg::A("Unknown castGPR element size: %d", Op->ElementSize);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::OP_VFCVTL: {
|
||||
auto Op = IROp->C<IR::IROp_VFCVTL>();
|
||||
switch (Op->ElementSize) {
|
||||
case 4:
|
||||
cvtps2pd(GetDst(Node), GetSrc(Op->Header.Args[0].ID()));
|
||||
break;
|
||||
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::OP_VFCVTN: {
|
||||
auto Op = IROp->C<IR::IROp_VFCVTN>();
|
||||
switch (Op->ElementSize) {
|
||||
case 8:
|
||||
cvtpd2ps(GetDst(Node), GetSrc(Op->Header.Args[0].ID()));
|
||||
break;
|
||||
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::OP_VBITCAST: {
|
||||
auto Op = IROp->C<IR::IROp_VBitcast>();
|
||||
movaps(GetDst(Node), GetSrc(Op->Header.Args[0].ID()));
|
||||
|
@ -375,12 +375,27 @@ public:
|
||||
IRPair<IROp_VNot> _VNot(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0) {
|
||||
return _VNot(ssa0, RegisterSize, ElementSize);
|
||||
}
|
||||
IRPair<IROp_VSQXTN> _VSQXTN(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0) {
|
||||
return _VSQXTN(ssa0, RegisterSize, ElementSize);
|
||||
}
|
||||
IRPair<IROp_VSQXTN2> _VSQXTN2(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
|
||||
return _VSQXTN2(ssa0, ssa1, RegisterSize, ElementSize);
|
||||
}
|
||||
IRPair<IROp_VSQXTUN> _VSQXTUN(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0) {
|
||||
return _VSQXTUN(ssa0, RegisterSize, ElementSize);
|
||||
}
|
||||
IRPair<IROp_VSQXTUN2> _VSQXTUN2(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
|
||||
return _VSQXTUN2(ssa0, ssa1, RegisterSize, ElementSize);
|
||||
}
|
||||
IRPair<IROp_VCastFromGPR> _VCastFromGPR(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0) {
|
||||
return _VCastFromGPR(ssa0, RegisterSize, ElementSize);
|
||||
}
|
||||
IRPair<IROp_VExtractToGPR> _VExtractToGPR(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, uint8_t Index) {
|
||||
return _VExtractToGPR(ssa0, RegisterSize, ElementSize, Index);
|
||||
}
|
||||
IRPair<IROp_VInsGPR> _VInsGPR(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1, uint8_t Index) {
|
||||
return _VInsGPR(ssa0, ssa1, RegisterSize, ElementSize, Index);
|
||||
}
|
||||
|
||||
IRPair<IROp_Jump> _Jump() {
|
||||
return _Jump(InvalidNode);
|
||||
|
Loading…
Reference in New Issue
Block a user