mirror of
https://github.com/FEX-Emu/FEX.git
synced 2024-12-13 17:15:41 +00:00
Various fixes that unit tests have found
- Need to distinguish between unsigned and signed saturating ops - Fixes vector splat - Fixes VUMULL/VSMULL num element calculation - Fixes undefined behaviour in a couple of vector shift ops - Implements VSSHR - Fixes VExtr calculating offset correctly - Fixes VInsGPR calculating offset correctly - Fixes PSRA (not immediate) x86 ops - Fixes scalar vector unary ops to correctly insert lower bits - Fixes conversion instructions correctly inserting lower bits - Fixes PINSRW using the wrong element size
This commit is contained in:
parent
9b21caa647
commit
9c363fb317
@ -12,6 +12,7 @@
|
||||
|
||||
#include <atomic>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
namespace FEXCore::CPU {
|
||||
@ -1855,10 +1856,11 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
|
||||
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
|
||||
auto *Src_d = reinterpret_cast<type*>(Src); \
|
||||
for (uint8_t i = 0; i < Elements; ++i) \
|
||||
Dst_d[i] = *Src_d;\
|
||||
Dst_d[i] = *Src_d;\
|
||||
break; \
|
||||
}
|
||||
switch (Op->Header.Size) {
|
||||
uint8_t ElementSize = OpSize / Elements;
|
||||
switch (ElementSize) {
|
||||
CREATE_VECTOR(1, uint8_t)
|
||||
CREATE_VECTOR(2, uint16_t)
|
||||
CREATE_VECTOR(4, uint32_t)
|
||||
@ -1952,6 +1954,17 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
|
||||
} \
|
||||
break; \
|
||||
}
|
||||
#define DO_VECTOR_SAT_OP(size, type, func, min, max) \
|
||||
case size: { \
|
||||
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
|
||||
auto *Src1_d = reinterpret_cast<type*>(Src1); \
|
||||
auto *Src2_d = reinterpret_cast<type*>(Src2); \
|
||||
for (uint8_t i = 0; i < Elements; ++i) { \
|
||||
Dst_d[i] = func(Src1_d[i], Src2_d[i], min, max); \
|
||||
} \
|
||||
break; \
|
||||
}
|
||||
|
||||
case IR::OP_VUSHRI: {
|
||||
auto Op = IROp->C<IR::IROp_VUShrI>();
|
||||
void *Src = GetSrc<void*>(Op->Header.Args[0]);
|
||||
@ -2048,6 +2061,82 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
|
||||
memcpy(GDP, Tmp, Op->RegisterSize);
|
||||
break;
|
||||
}
|
||||
case IR::OP_VUQADD: {
|
||||
auto Op = IROp->C<IR::IROp_VUQAdd>();
|
||||
void *Src1 = GetSrc<void*>(Op->Header.Args[0]);
|
||||
void *Src2 = GetSrc<void*>(Op->Header.Args[1]);
|
||||
uint8_t Tmp[16];
|
||||
|
||||
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
|
||||
|
||||
auto Func = [](auto a, auto b) {
|
||||
decltype(a) res = a + b;
|
||||
return res < a ? ~0U : res;
|
||||
};
|
||||
switch (Op->ElementSize) {
|
||||
DO_VECTOR_OP(1, uint8_t, Func)
|
||||
DO_VECTOR_OP(2, uint16_t, Func)
|
||||
DO_VECTOR_OP(4, uint32_t, Func)
|
||||
DO_VECTOR_OP(8, uint64_t, Func)
|
||||
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
|
||||
}
|
||||
memcpy(GDP, Tmp, Op->RegisterSize);
|
||||
break;
|
||||
}
|
||||
case IR::OP_VUQSUB: {
|
||||
auto Op = IROp->C<IR::IROp_VUQSub>();
|
||||
void *Src1 = GetSrc<void*>(Op->Header.Args[0]);
|
||||
void *Src2 = GetSrc<void*>(Op->Header.Args[1]);
|
||||
uint8_t Tmp[16];
|
||||
|
||||
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
|
||||
|
||||
auto Func = [](auto a, auto b) {
|
||||
decltype(a) res = a - b;
|
||||
return res > a ? 0U : res;
|
||||
};
|
||||
switch (Op->ElementSize) {
|
||||
DO_VECTOR_OP(1, uint8_t, Func)
|
||||
DO_VECTOR_OP(2, uint16_t, Func)
|
||||
DO_VECTOR_OP(4, uint32_t, Func)
|
||||
DO_VECTOR_OP(8, uint64_t, Func)
|
||||
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
|
||||
}
|
||||
memcpy(GDP, Tmp, Op->RegisterSize);
|
||||
break;
|
||||
}
|
||||
case IR::OP_VSQADD: {
|
||||
auto Op = IROp->C<IR::IROp_VSQAdd>();
|
||||
void *Src1 = GetSrc<void*>(Op->Header.Args[0]);
|
||||
void *Src2 = GetSrc<void*>(Op->Header.Args[1]);
|
||||
uint8_t Tmp[16];
|
||||
|
||||
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
|
||||
|
||||
auto Func = [](auto a, auto b) {
|
||||
decltype(a) res = a + b;
|
||||
|
||||
if (a > 0) {
|
||||
if (b > (std::numeric_limits<decltype(a)>::max() - a)) {
|
||||
return std::numeric_limits<decltype(a)>::max();
|
||||
}
|
||||
}
|
||||
else if (b < (std::numeric_limits<decltype(a)>::min() - a)) {
|
||||
return std::numeric_limits<decltype(a)>::min();
|
||||
}
|
||||
|
||||
return res;
|
||||
};
|
||||
switch (Op->ElementSize) {
|
||||
DO_VECTOR_OP(1, int8_t, Func)
|
||||
DO_VECTOR_OP(2, int16_t, Func)
|
||||
DO_VECTOR_OP(4, int32_t, Func)
|
||||
DO_VECTOR_OP(8, int64_t, Func)
|
||||
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
|
||||
}
|
||||
memcpy(GDP, Tmp, Op->RegisterSize);
|
||||
break;
|
||||
}
|
||||
case IR::OP_VFADD: {
|
||||
auto Op = IROp->C<IR::IROp_VFAdd>();
|
||||
void *Src1 = GetSrc<void*>(Op->Header.Args[0]);
|
||||
@ -2300,7 +2389,7 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
|
||||
|
||||
uint8_t Tmp[16];
|
||||
|
||||
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
|
||||
uint8_t Elements = Op->RegisterSize / (Op->ElementSize << 1);
|
||||
|
||||
auto Func = [](auto a, auto b) { return a * b; };
|
||||
switch (Op->ElementSize) {
|
||||
@ -2319,7 +2408,7 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
|
||||
|
||||
uint8_t Tmp[16];
|
||||
|
||||
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
|
||||
uint8_t Elements = Op->RegisterSize / (Op->ElementSize << 1);
|
||||
|
||||
auto Func = [](auto a, auto b) { return a * b; };
|
||||
switch (Op->ElementSize) {
|
||||
@ -2478,7 +2567,7 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
|
||||
uint8_t Tmp[16];
|
||||
|
||||
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
|
||||
auto Func = [](auto a, auto b) { return a << b; };
|
||||
auto Func = [](auto a, auto b) { return b >= (sizeof(a) * 8) ? 0 : a << b; };
|
||||
|
||||
switch (Op->ElementSize) {
|
||||
DO_VECTOR_OP(1, uint8_t, Func)
|
||||
@ -2490,6 +2579,26 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
|
||||
memcpy(GDP, Tmp, Op->RegisterSize);
|
||||
break;
|
||||
}
|
||||
case IR::OP_VSSHR: {
|
||||
auto Op = IROp->C<IR::IROp_VSShr>();
|
||||
void *Src1 = GetSrc<void*>(Op->Header.Args[0]);
|
||||
void *Src2 = GetSrc<void*>(Op->Header.Args[1]);
|
||||
uint8_t Tmp[16];
|
||||
|
||||
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
|
||||
auto Func = [](auto a, auto b) { return b >= (sizeof(a) * 8) ? (a >> (sizeof(a) * 8 - 1)) : a >> b; };
|
||||
|
||||
switch (Op->ElementSize) {
|
||||
DO_VECTOR_OP(1, int8_t, Func)
|
||||
DO_VECTOR_OP(2, int16_t, Func)
|
||||
DO_VECTOR_OP(4, int32_t, Func)
|
||||
DO_VECTOR_OP(8, int64_t, Func)
|
||||
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
|
||||
}
|
||||
memcpy(GDP, Tmp, Op->RegisterSize);
|
||||
break;
|
||||
}
|
||||
|
||||
case IR::OP_VUSHLS: {
|
||||
auto Op = IROp->C<IR::IROp_VUShlS>();
|
||||
void *Src1 = GetSrc<void*>(Op->Header.Args[0]);
|
||||
@ -2497,7 +2606,7 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
|
||||
uint8_t Tmp[16];
|
||||
|
||||
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
|
||||
auto Func = [](auto a, auto b) { return a << b; };
|
||||
auto Func = [](auto a, auto b) { return b >= (sizeof(a) * 8) ? 0 : a << b; };
|
||||
|
||||
switch (Op->ElementSize) {
|
||||
DO_VECTOR_SCALAR_OP(1, uint8_t, Func)
|
||||
@ -2517,7 +2626,7 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
|
||||
uint8_t Tmp[16];
|
||||
|
||||
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
|
||||
auto Func = [](auto a, auto b) { return a >> b; };
|
||||
auto Func = [](auto a, auto b) { return b >= (sizeof(a) * 8) ? 0 : a >> b; };
|
||||
|
||||
switch (Op->ElementSize) {
|
||||
DO_VECTOR_SCALAR_OP(1, uint8_t, Func)
|
||||
@ -2537,7 +2646,7 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
|
||||
uint8_t Tmp[16];
|
||||
|
||||
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
|
||||
auto Func = [](auto a, auto b) { return a >> b; };
|
||||
auto Func = [](auto a, auto b) { return b >= (sizeof(a) * 8) ? (a >> (sizeof(a) * 8 - 1)) : a >> b; };
|
||||
|
||||
switch (Op->ElementSize) {
|
||||
DO_VECTOR_SCALAR_OP(1, int8_t, Func)
|
||||
@ -2854,7 +2963,7 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
|
||||
__uint128_t Src1 = *GetSrc<__uint128_t*>(Op->Header.Args[0]);
|
||||
__uint128_t Src2 = *GetSrc<__uint128_t*>(Op->Header.Args[1]);
|
||||
|
||||
uint64_t Offset = Op->Index * 8;
|
||||
uint64_t Offset = Op->Index * Op->ElementSize * 8;
|
||||
__uint128_t Dst = (Src1 << (sizeof(__uint128_t) * 8 - Offset)) | (Src2 >> Offset);
|
||||
|
||||
memcpy(GDP, &Dst, Op->RegisterSize);
|
||||
@ -2865,7 +2974,7 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
|
||||
__uint128_t Src1 = *GetSrc<__uint128_t*>(Op->Header.Args[0]);
|
||||
__uint128_t Src2 = *GetSrc<__uint128_t*>(Op->Header.Args[1]);
|
||||
|
||||
uint64_t Offset = Op->Index * 8;
|
||||
uint64_t Offset = Op->Index * Op->ElementSize * 8;
|
||||
__uint128_t Mask = (1ULL << (Op->ElementSize * 8)) - 1;
|
||||
Mask <<= Offset;
|
||||
Mask = ~Mask;
|
||||
|
@ -2365,22 +2365,7 @@ template<FEXCore::IR::IROps IROp, size_t ElementSize>
|
||||
void OpDispatchBuilder::VectorScalarALUOp(OpcodeArgs) {
|
||||
auto Size = GetSrcSize(Op);
|
||||
OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, -1);
|
||||
OrderedNode *Src{};
|
||||
if (Op->Src[0].TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_GPR) {
|
||||
Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
|
||||
}
|
||||
else {
|
||||
// Load a elementsize from memory and move it to a vector register
|
||||
Src = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], ElementSize, Op->Flags, -1);
|
||||
switch (ElementSize) {
|
||||
case 4:
|
||||
Src = _SplatVector4(Src);
|
||||
break;
|
||||
case 8:
|
||||
Src = _SplatVector2(Src);
|
||||
break;
|
||||
}
|
||||
}
|
||||
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
|
||||
|
||||
// If OpSize == ElementSize then it only does the lower scalar op
|
||||
auto ALUOp = _VAdd(ElementSize, ElementSize, Dest, Src);
|
||||
@ -2406,10 +2391,14 @@ void OpDispatchBuilder::VectorUnaryOp(OpcodeArgs) {
|
||||
// Overwrite our IR's op type
|
||||
ALUOp.first->Header.Op = IROp;
|
||||
|
||||
// Insert the lower bits
|
||||
auto Result = _VInsElement(Size, ElementSize, 0, 0, Dest, ALUOp);
|
||||
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
if (Scalar) {
|
||||
// Insert the lower bits
|
||||
auto Result = _VInsElement(GetSrcSize(Op), ElementSize, 0, 0, Dest, ALUOp);
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
}
|
||||
else {
|
||||
StoreResult(FPRClass, Op, ALUOp, -1);
|
||||
}
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::MOVQOp(OpcodeArgs) {
|
||||
@ -2549,10 +2538,10 @@ void OpDispatchBuilder::ANDNOp(OpcodeArgs) {
|
||||
|
||||
template<size_t ElementSize>
|
||||
void OpDispatchBuilder::PINSROp(OpcodeArgs) {
|
||||
auto Size = GetSrcSize(Op);
|
||||
auto Size = GetDstSize(Op);
|
||||
|
||||
OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, -1);
|
||||
OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, -1);
|
||||
OrderedNode *Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, GetDstSize(Op), Op->Flags, -1);
|
||||
LogMan::Throw::A(Op->Src[1].TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_LITERAL, "Src1 needs to be literal here");
|
||||
uint64_t Index = Op->Src[1].TypeLiteral.Literal;
|
||||
|
||||
@ -3806,6 +3795,25 @@ void OpDispatchBuilder::PSLL(OpcodeArgs) {
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
template<size_t ElementSize, bool Scalar, uint32_t SrcIndex>
|
||||
void OpDispatchBuilder::PSRAOp(OpcodeArgs) {
|
||||
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[SrcIndex], Op->Flags, -1);
|
||||
OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, -1);
|
||||
|
||||
auto Size = GetDstSize(Op);
|
||||
|
||||
OrderedNode *Result{};
|
||||
|
||||
if (Scalar) {
|
||||
Result = _VSShrS(Size, ElementSize, Dest, Src);
|
||||
}
|
||||
else {
|
||||
Result = _VSShr(Size, ElementSize, Dest, Src);
|
||||
}
|
||||
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::PSRLDQ(OpcodeArgs) {
|
||||
LogMan::Throw::A(Op->Src[1].TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_LITERAL, "Src1 needs to be literal here");
|
||||
uint64_t Shift = Op->Src[1].TypeLiteral.Literal;
|
||||
@ -3874,7 +3882,7 @@ void OpDispatchBuilder::CVT(OpcodeArgs) {
|
||||
else
|
||||
Src = _UCVTF(Src, DstElementSize);
|
||||
|
||||
OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, -1);
|
||||
OrderedNode *Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, 16, Op->Flags, -1);
|
||||
|
||||
Src = _VInsElement(16, DstElementSize, 0, 0, Dest, Src);
|
||||
|
||||
@ -3972,7 +3980,7 @@ template<size_t ElementSize, bool Scalar>
|
||||
void OpDispatchBuilder::VFCMPOp(OpcodeArgs) {
|
||||
auto Size = GetSrcSize(Op);
|
||||
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
|
||||
OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, -1);
|
||||
OrderedNode *Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, GetDstSize(Op), Op->Flags, -1);
|
||||
uint8_t CompType = Op->Src[1].TypeLiteral.Literal;
|
||||
|
||||
OrderedNode *Result{};
|
||||
@ -4002,7 +4010,7 @@ void OpDispatchBuilder::VFCMPOp(OpcodeArgs) {
|
||||
|
||||
if (Scalar) {
|
||||
// Insert the lower bits
|
||||
Result = _VInsElement(Size, ElementSize, 0, 0, Dest, Result);
|
||||
Result = _VInsElement(GetDstSize(Op), ElementSize, 0, 0, Dest, Result);
|
||||
}
|
||||
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
@ -4658,7 +4666,7 @@ void InstallOpcodeHandlers() {
|
||||
{0x5F, 1, &OpDispatchBuilder::VectorScalarALUOp<IR::OP_VFMAX, 8>},
|
||||
{0x70, 1, &OpDispatchBuilder::PSHUFDOp<2, true>},
|
||||
{0xC2, 1, &OpDispatchBuilder::VFCMPOp<8, true>},
|
||||
{0xF0, 1, &OpDispatchBuilder::UnimplementedOp},
|
||||
{0xF0, 1, &OpDispatchBuilder::MOVVectorOp},
|
||||
};
|
||||
|
||||
const std::vector<std::tuple<uint8_t, uint8_t, FEXCore::X86Tables::OpDispatchPtr>> OpSizeModOpTable = {
|
||||
@ -4707,7 +4715,7 @@ void InstallOpcodeHandlers() {
|
||||
{0x7E, 1, &OpDispatchBuilder::MOVDOp},
|
||||
{0x7F, 1, &OpDispatchBuilder::MOVUPSOp},
|
||||
{0xC2, 1, &OpDispatchBuilder::VFCMPOp<8, false>},
|
||||
{0xC4, 1, &OpDispatchBuilder::PINSROp<4>},
|
||||
{0xC4, 1, &OpDispatchBuilder::PINSROp<2>},
|
||||
{0xC6, 1, &OpDispatchBuilder::SHUFOp<8>},
|
||||
|
||||
{0xD4, 1, &OpDispatchBuilder::PADDQOp<8>},
|
||||
@ -4719,13 +4727,13 @@ void InstallOpcodeHandlers() {
|
||||
{0xDB, 1, &OpDispatchBuilder::VectorALUOp<IR::OP_VAND, 16>},
|
||||
{0xDE, 1, &OpDispatchBuilder::PMAXUOp<1>},
|
||||
{0xDF, 1, &OpDispatchBuilder::ANDNOp},
|
||||
{0xE1, 1, &OpDispatchBuilder::PSRAIOp<2>},
|
||||
{0xE2, 1, &OpDispatchBuilder::PSRAIOp<4>},
|
||||
{0xE1, 1, &OpDispatchBuilder::PSRAOp<2, true, 0>},
|
||||
{0xE2, 1, &OpDispatchBuilder::PSRAOp<4, true, 0>},
|
||||
{0xE7, 1, &OpDispatchBuilder::MOVVectorOp},
|
||||
{0xEA, 1, &OpDispatchBuilder::PMINSWOp},
|
||||
{0xEB, 1, &OpDispatchBuilder::VectorALUOp<IR::OP_VOR, 16>},
|
||||
{0xEC, 1, &OpDispatchBuilder::VectorALUOp<IR::OP_VQADD, 1>},
|
||||
{0xED, 1, &OpDispatchBuilder::VectorALUOp<IR::OP_VQADD, 2>},
|
||||
{0xEC, 1, &OpDispatchBuilder::VectorALUOp<IR::OP_VSQADD, 1>},
|
||||
{0xED, 1, &OpDispatchBuilder::VectorALUOp<IR::OP_VSQADD, 2>},
|
||||
{0xEE, 1, &OpDispatchBuilder::VectorALUOp<IR::OP_VSMAX, 2>},
|
||||
{0xEF, 1, &OpDispatchBuilder::VectorALUOp<IR::OP_VXOR, 16>},
|
||||
|
||||
|
@ -223,6 +223,8 @@ public:
|
||||
void PSLLI(OpcodeArgs);
|
||||
template<size_t ElementSize, bool Scalar, uint32_t SrcIndex>
|
||||
void PSLL(OpcodeArgs);
|
||||
template<size_t ElementSize, bool Scalar, uint32_t SrcIndex>
|
||||
void PSRAOp(OpcodeArgs);
|
||||
void PSRLDQ(OpcodeArgs);
|
||||
void PSLLDQ(OpcodeArgs);
|
||||
template<size_t ElementSize>
|
||||
@ -332,11 +334,17 @@ public:
|
||||
IRPair<IROp_VSub> _VSub(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
|
||||
return _VSub(ssa0, ssa1, RegisterSize, ElementSize);
|
||||
}
|
||||
IRPair<IROp_VQAdd> _VQAdd(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
|
||||
return _VQAdd(ssa0, ssa1, RegisterSize, ElementSize);
|
||||
IRPair<IROp_VUQAdd> _VUQAdd(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
|
||||
return _VUQAdd(ssa0, ssa1, RegisterSize, ElementSize);
|
||||
}
|
||||
IRPair<IROp_VQSub> _VQSub(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
|
||||
return _VQSub(ssa0, ssa1, RegisterSize, ElementSize);
|
||||
IRPair<IROp_VUQSub> _VUQSub(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
|
||||
return _VUQSub(ssa0, ssa1, RegisterSize, ElementSize);
|
||||
}
|
||||
IRPair<IROp_VSQAdd> _VSQAdd(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
|
||||
return _VSQAdd(ssa0, ssa1, RegisterSize, ElementSize);
|
||||
}
|
||||
IRPair<IROp_VSQSub> _VSQSub(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
|
||||
return _VSQSub(ssa0, ssa1, RegisterSize, ElementSize);
|
||||
}
|
||||
IRPair<IROp_VUMin> _VUMin(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
|
||||
return _VUMin(ssa0, ssa1, RegisterSize, ElementSize);
|
||||
@ -392,6 +400,9 @@ public:
|
||||
IRPair<IROp_VUShr> _VUShr(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
|
||||
return _VUShr(ssa0, ssa1, RegisterSize, ElementSize);
|
||||
}
|
||||
IRPair<IROp_VSShr> _VSShr(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
|
||||
return _VSShr(ssa0, ssa1, RegisterSize, ElementSize);
|
||||
}
|
||||
IRPair<IROp_VExtr> _VExtr(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1, uint8_t Index) {
|
||||
return _VExtr(ssa0, ssa1, RegisterSize, ElementSize, Index);
|
||||
}
|
||||
|
@ -492,7 +492,7 @@ void InitializeSecondaryTables() {
|
||||
{0xC0, 2, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}},
|
||||
{0xC2, 1, X86InstInfo{"CMPPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
|
||||
{0xC3, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}},
|
||||
{0xC4, 1, X86InstInfo{"PINSRW", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_SF_SRC_GPR | FLAGS_XMM_FLAGS, 1, nullptr}},
|
||||
{0xC4, 1, X86InstInfo{"PINSRW", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_16BIT) | FLAGS_MODRM | FLAGS_SF_SRC_GPR | FLAGS_XMM_FLAGS, 1, nullptr}},
|
||||
{0xC5, 1, X86InstInfo{"PEXTRW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS, 1, nullptr}},
|
||||
{0xC6, 1, X86InstInfo{"SHUFPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
|
||||
{0xC7, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}},
|
||||
|
@ -591,21 +591,21 @@
|
||||
"SplatVector2": {
|
||||
"HasDest": true,
|
||||
"NumElements": "2",
|
||||
"DestSize": "GetOpSize(ssa0)",
|
||||
"DestSize": "GetOpSize(ssa0) * 2",
|
||||
"SSAArgs": "1"
|
||||
},
|
||||
|
||||
"SplatVector3": {
|
||||
"HasDest": true,
|
||||
"NumElements": "3",
|
||||
"DestSize": "GetOpSize(ssa0)",
|
||||
"DestSize": "GetOpSize(ssa0) * 3",
|
||||
"SSAArgs": "1"
|
||||
},
|
||||
|
||||
"SplatVector4": {
|
||||
"HasDest": true,
|
||||
"NumElements": "4",
|
||||
"DestSize": "GetOpSize(ssa0)",
|
||||
"DestSize": "GetOpSize(ssa0) * 4",
|
||||
"SSAArgs": "1"
|
||||
},
|
||||
|
||||
@ -654,7 +654,7 @@
|
||||
]
|
||||
},
|
||||
|
||||
"VQAdd": {
|
||||
"VUQAdd": {
|
||||
"HasDest": true,
|
||||
"SSAArgs": "2",
|
||||
"Args": [
|
||||
@ -663,7 +663,25 @@
|
||||
]
|
||||
},
|
||||
|
||||
"VQSub": {
|
||||
"VUQSub": {
|
||||
"HasDest": true,
|
||||
"SSAArgs": "2",
|
||||
"Args": [
|
||||
"uint8_t", "RegisterSize",
|
||||
"uint8_t", "ElementSize"
|
||||
]
|
||||
},
|
||||
|
||||
"VSQAdd": {
|
||||
"HasDest": true,
|
||||
"SSAArgs": "2",
|
||||
"Args": [
|
||||
"uint8_t", "RegisterSize",
|
||||
"uint8_t", "ElementSize"
|
||||
]
|
||||
},
|
||||
|
||||
"VSQSub": {
|
||||
"HasDest": true,
|
||||
"SSAArgs": "2",
|
||||
"Args": [
|
||||
@ -906,6 +924,15 @@
|
||||
]
|
||||
},
|
||||
|
||||
"VSShr": {
|
||||
"HasDest": true,
|
||||
"SSAArgs": "2",
|
||||
"Args": [
|
||||
"uint8_t", "RegisterSize",
|
||||
"uint8_t", "ElementSize"
|
||||
]
|
||||
},
|
||||
|
||||
"VUShlS": {
|
||||
"HasDest": true,
|
||||
"SSAArgs": "2",
|
||||
|
Loading…
Reference in New Issue
Block a user