Adds new IR ops to interpreter and x86 JIT

This commit is contained in:
Ryan Houdek 2020-02-17 17:03:12 -08:00 committed by Stefanos Kornilios Mitsis Poiitidis
parent 3bad1eafbc
commit c336a09641
3 changed files with 344 additions and 0 deletions

View File

@ -2136,6 +2136,155 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
memcpy(GDP, Tmp, Op->RegisterSize);
break;
}
#define DO_VECTOR_1SRC_2TYPE_OP(size, type, type2, func, max) \
case size: { \
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
auto *Src_d = reinterpret_cast<type2*>(Src); \
for (uint8_t i = 0; i < Elements; ++i) { \
Dst_d[i] = (type)func(Src_d[i], max); \
} \
break; \
}
#define DO_VECTOR_1SRC_2TYPE_OP_TOP(size, type, type2, func, max) \
case size: { \
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
auto *Src_d = reinterpret_cast<type2*>(Src2); \
memcpy(Dst_d, Src1, Elements * sizeof(type2));\
for (uint8_t i = 0; i < Elements; ++i) { \
Dst_d[i+Elements] = (type)func(Src_d[i], max); \
} \
break; \
}
case IR::OP_VSQXTUN: {
auto Op = IROp->C<IR::IROp_VSQXTUN>();
void *Src = GetSrc<void*>(Op->Header.Args[0]);
uint8_t Tmp[16];
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
auto Func = [](auto a, auto max) { return std::max(std::min(a, (decltype(a))max), (decltype(a))0); };
switch (Op->ElementSize) {
DO_VECTOR_1SRC_2TYPE_OP(2, int16_t, uint8_t, Func, (1 << 8) - 1)
DO_VECTOR_1SRC_2TYPE_OP(4, int32_t, uint16_t, Func, (1 << 16) - 1)
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
}
memcpy(GDP, Tmp, Op->RegisterSize);
break;
}
case IR::OP_VSQXTUN2: {
auto Op = IROp->C<IR::IROp_VSQXTUN2>();
void *Src1 = GetSrc<void*>(Op->Header.Args[0]);
void *Src2 = GetSrc<void*>(Op->Header.Args[1]);
uint8_t Tmp[16];
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
auto Func = [](auto a, auto max) { return std::max(std::min(a, (decltype(a))max), (decltype(a))0); };
switch (Op->ElementSize) {
DO_VECTOR_1SRC_2TYPE_OP_TOP(2, int16_t, uint8_t, Func, (1 << 8) - 1)
DO_VECTOR_1SRC_2TYPE_OP_TOP(4, int32_t, uint16_t, Func, (1 << 16) - 1)
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
}
memcpy(GDP, Tmp, Op->RegisterSize);
break;
}
case IR::OP_VUCVTF: {
auto Op = IROp->C<IR::IROp_VUCVTF>();
void *Src = GetSrc<void*>(Op->Header.Args[0]);
uint8_t Tmp[16];
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
auto Func = [](auto a, auto max) { return a; };
switch (Op->ElementSize) {
DO_VECTOR_1SRC_2TYPE_OP(4, float, uint32_t, Func, 0)
DO_VECTOR_1SRC_2TYPE_OP(8, double, uint64_t, Func, 0)
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
}
memcpy(GDP, Tmp, Op->RegisterSize);
break;
}
case IR::OP_VSCVTF: {
auto Op = IROp->C<IR::IROp_VSCVTF>();
void *Src = GetSrc<void*>(Op->Header.Args[0]);
uint8_t Tmp[16];
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
auto Func = [](auto a, auto max) { return a; };
switch (Op->ElementSize) {
DO_VECTOR_1SRC_2TYPE_OP(4, float, int32_t, Func, 0)
DO_VECTOR_1SRC_2TYPE_OP(8, double, int64_t, Func, 0)
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
}
memcpy(GDP, Tmp, Op->RegisterSize);
break;
}
case IR::OP_VFCVTL: {
auto Op = IROp->C<IR::IROp_VFCVTL>();
void *Src = GetSrc<void*>(Op->Header.Args[0]);
uint8_t Tmp[16];
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
auto Func = [](auto a, auto max) { return a; };
switch (Op->ElementSize) {
DO_VECTOR_1SRC_2TYPE_OP(4, double, float, Func, 0)
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
}
memcpy(GDP, Tmp, Op->RegisterSize);
break;
}
case IR::OP_VFCVTN: {
auto Op = IROp->C<IR::IROp_VFCVTN>();
void *Src = GetSrc<void*>(Op->Header.Args[0]);
uint8_t Tmp[16];
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
auto Func = [](auto a, auto max) { return a; };
switch (Op->ElementSize) {
DO_VECTOR_1SRC_2TYPE_OP(8, float, double, Func, 0)
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
}
memcpy(GDP, Tmp, Op->RegisterSize);
break;
}
case IR::OP_VSXTL: {
auto Op = IROp->C<IR::IROp_VSXTL>();
void *Src = GetSrc<void*>(Op->Header.Args[0]);
uint8_t Tmp[16];
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
auto Func = [](auto a, auto max) { return a; };
switch (Op->ElementSize) {
DO_VECTOR_1SRC_2TYPE_OP(1, int16_t, int8_t, Func, 0)
DO_VECTOR_1SRC_2TYPE_OP(2, int32_t, int16_t, Func, 0)
DO_VECTOR_1SRC_2TYPE_OP(4, int64_t, int32_t, Func, 0)
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
}
memcpy(GDP, Tmp, Op->RegisterSize);
break;
}
case IR::OP_VUXTL: {
auto Op = IROp->C<IR::IROp_VUXTL>();
void *Src = GetSrc<void*>(Op->Header.Args[0]);
uint8_t Tmp[16];
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
auto Func = [](auto a, auto max) { return a; };
switch (Op->ElementSize) {
DO_VECTOR_1SRC_2TYPE_OP(1, uint16_t, uint8_t, Func, 0)
DO_VECTOR_1SRC_2TYPE_OP(2, uint32_t, uint16_t, Func, 0)
DO_VECTOR_1SRC_2TYPE_OP(4, uint64_t, uint32_t, Func, 0)
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
}
memcpy(GDP, Tmp, Op->RegisterSize);
break;
}
case IR::OP_VUMIN: {
auto Op = IROp->C<IR::IROp_VUMin>();
void *Src1 = GetSrc<void*>(Op->Header.Args[0]);
@ -2561,6 +2710,22 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
memcpy(GDP, &Dst, Op->RegisterSize);
break;
}
case IR::OP_VINSGPR: {
auto Op = IROp->C<IR::IROp_VInsGPR>();
__uint128_t Src1 = *GetSrc<__uint128_t*>(Op->Header.Args[0]);
__uint128_t Src2 = *GetSrc<__uint128_t*>(Op->Header.Args[1]);
uint64_t Offset = Op->Index * 8;
__uint128_t Mask = (1ULL << (Op->ElementSize * 8)) - 1;
Mask <<= Offset;
Mask = ~Mask;
__uint128_t Dst = Src1 & Mask;
Dst |= Src2 << Offset;
memcpy(GDP, &Dst, Op->RegisterSize);
break;
}
case IR::OP_SCVTF: {
auto Op = IROp->C<IR::IROp_SCVTF>();
if (Op->ElementSize == 8) {

View File

@ -1594,7 +1594,32 @@ void *JITCore::CompileCode([[maybe_unused]] FEXCore::IR::IRListView<true> const
break;
}
case IR::OP_VINSGPR: {
auto Op = IROp->C<IR::IROp_VInsGPR>();
movapd(GetDst(Node), GetSrc(Op->Header.Args[0].ID()));
switch (Op->ElementSize) {
case 1: {
pinsrb(GetDst(Node), GetSrc<RA_8>(Op->Header.Args[0].ID()), Op->Index);
break;
}
case 2: {
pinsrw(GetDst(Node), GetSrc<RA_16>(Op->Header.Args[0].ID()), Op->Index);
break;
}
case 4: {
pinsrd(GetDst(Node), GetSrc<RA_32>(Op->Header.Args[0].ID()), Op->Index);
break;
}
case 8: {
pinsrq(GetDst(Node), GetSrc<RA_64>(Op->Header.Args[0].ID()), Op->Index);
break;
}
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
}
break;
}
case IR::OP_PRINT: {
auto Op = IROp->C<IR::IROp_Print>();
@ -1957,6 +1982,71 @@ void *JITCore::CompileCode([[maybe_unused]] FEXCore::IR::IRListView<true> const
}
break;
}
case IR::OP_VFMAX: {
auto Op = IROp->C<IR::IROp_VFMax>();
if (Op->ElementSize == Op->RegisterSize) {
// Scalar
switch (Op->ElementSize) {
case 4: {
vmaxss(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID()));
break;
}
case 8: {
vmaxsd(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID()));
break;
}
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
}
}
else {
// Vector
switch (Op->ElementSize) {
case 4: {
vmaxps(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID()));
break;
}
case 8: {
vmaxpd(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID()));
break;
}
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
}
}
break;
}
case IR::OP_VFMIN: {
auto Op = IROp->C<IR::IROp_VFMin>();
if (Op->ElementSize == Op->RegisterSize) {
// Scalar
switch (Op->ElementSize) {
case 4: {
vminss(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID()));
break;
}
case 8: {
vminsd(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID()));
break;
}
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
}
}
else {
// Vector
switch (Op->ElementSize) {
case 4: {
vminps(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID()));
break;
}
case 8: {
vminpd(GetDst(Node), GetSrc(Op->Header.Args[0].ID()), GetSrc(Op->Header.Args[1].ID()));
break;
}
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
}
}
break;
}
case IR::OP_VFRECP: {
auto Op = IROp->C<IR::IROp_VFRecp>();
if (Op->ElementSize == Op->RegisterSize) {
@ -2037,6 +2127,80 @@ void *JITCore::CompileCode([[maybe_unused]] FEXCore::IR::IRListView<true> const
}
break;
}
case IR::OP_VSXTL: {
auto Op = IROp->C<IR::IROp_VSXTL>();
switch (Op->ElementSize) {
case 1:
pmovsxbw(GetDst(Node), GetSrc(Op->Header.Args[0].ID()));
break;
case 2:
pmovsxwd(GetDst(Node), GetSrc(Op->Header.Args[0].ID()));
break;
case 4:
pmovsxdq(GetDst(Node), GetSrc(Op->Header.Args[0].ID()));
break;
default: LogMan::Msg::A("Unknown element size: %d", Op->ElementSize);
}
break;
}
case IR::OP_VUXTL: {
auto Op = IROp->C<IR::IROp_VUXTL>();
switch (Op->ElementSize) {
case 1:
pmovzxbw(GetDst(Node), GetSrc(Op->Header.Args[0].ID()));
break;
case 2:
pmovzxwd(GetDst(Node), GetSrc(Op->Header.Args[0].ID()));
break;
case 4:
pmovzxdq(GetDst(Node), GetSrc(Op->Header.Args[0].ID()));
break;
default: LogMan::Msg::A("Unknown element size: %d", Op->ElementSize);
}
break;
}
case IR::OP_VSCVTF: {
auto Op = IROp->C<IR::IROp_VSCVTF>();
switch (Op->ElementSize) {
case 4:
cvtdq2ps(GetDst(Node), GetSrc(Op->Header.Args[0].ID()));
break;
case 8:
// This operation is a bit disgusting in x86
// There is no vector form of this instruction until AVX512VL + AVX512DQ (vcvtqq2pd)
// 1) First extract the top 64bits
// 2) Do a scalar conversion on each
// 3) Make sure to merge them together at the end
pextrq(rax, GetSrc(Op->Header.Args[0].ID()), 1);
pextrq(rcx, GetSrc(Op->Header.Args[0].ID()), 0);
cvtsi2ss(GetDst(Node), rcx);
cvtsi2ss(xmm15, rax);
movlhps(GetDst(Node), xmm15);
break;
default: LogMan::Msg::A("Unknown castGPR element size: %d", Op->ElementSize);
}
break;
}
case IR::OP_VFCVTL: {
auto Op = IROp->C<IR::IROp_VFCVTL>();
switch (Op->ElementSize) {
case 4:
cvtps2pd(GetDst(Node), GetSrc(Op->Header.Args[0].ID()));
break;
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
}
break;
}
case IR::OP_VFCVTN: {
auto Op = IROp->C<IR::IROp_VFCVTN>();
switch (Op->ElementSize) {
case 8:
cvtpd2ps(GetDst(Node), GetSrc(Op->Header.Args[0].ID()));
break;
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
}
break;
}
case IR::OP_VBITCAST: {
auto Op = IROp->C<IR::IROp_VBitcast>();
movaps(GetDst(Node), GetSrc(Op->Header.Args[0].ID()));

View File

@ -375,12 +375,27 @@ public:
IRPair<IROp_VNot> _VNot(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0) {
return _VNot(ssa0, RegisterSize, ElementSize);
}
IRPair<IROp_VSQXTN> _VSQXTN(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0) {
return _VSQXTN(ssa0, RegisterSize, ElementSize);
}
IRPair<IROp_VSQXTN2> _VSQXTN2(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VSQXTN2(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VSQXTUN> _VSQXTUN(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0) {
return _VSQXTUN(ssa0, RegisterSize, ElementSize);
}
IRPair<IROp_VSQXTUN2> _VSQXTUN2(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VSQXTUN2(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VCastFromGPR> _VCastFromGPR(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0) {
return _VCastFromGPR(ssa0, RegisterSize, ElementSize);
}
IRPair<IROp_VExtractToGPR> _VExtractToGPR(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, uint8_t Index) {
return _VExtractToGPR(ssa0, RegisterSize, ElementSize, Index);
}
IRPair<IROp_VInsGPR> _VInsGPR(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1, uint8_t Index) {
return _VInsGPR(ssa0, ssa1, RegisterSize, ElementSize, Index);
}
IRPair<IROp_Jump> _Jump() {
return _Jump(InvalidNode);