Various fixes that unit tests have found

- Need to distinguish between unsigned and signed saturating ops
- Fixes vector splat
- Fixes VUMULL/VSMULL num element calculation
- Fixes undefined behaviour in a couple of vector shift ops
- Implements VSSHR
- Fixes VExtr calculating offset correctly
- Fixes VInsGPR calculating offset correctly
- Fixes PSRA (not immediate) x86 ops
- Fixes scalar vector unary ops to correctly insert lower bits
- Fixes conversion instructions correctly inserting lower bits
- Fixes PINSRW using the wrong element size
This commit is contained in:
Ryan Houdek 2020-03-05 01:07:20 -08:00 committed by Stefanos Kornilios Mitsis Poiitidis
parent 9b21caa647
commit 9c363fb317
5 changed files with 206 additions and 51 deletions

View File

@ -12,6 +12,7 @@
#include <atomic>
#include <cmath>
#include <limits>
#include <vector>
namespace FEXCore::CPU {
@ -1855,10 +1856,11 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
auto *Src_d = reinterpret_cast<type*>(Src); \
for (uint8_t i = 0; i < Elements; ++i) \
Dst_d[i] = *Src_d;\
Dst_d[i] = *Src_d;\
break; \
}
switch (Op->Header.Size) {
uint8_t ElementSize = OpSize / Elements;
switch (ElementSize) {
CREATE_VECTOR(1, uint8_t)
CREATE_VECTOR(2, uint16_t)
CREATE_VECTOR(4, uint32_t)
@ -1952,6 +1954,17 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
} \
break; \
}
#define DO_VECTOR_SAT_OP(size, type, func, min, max) \
case size: { \
auto *Dst_d = reinterpret_cast<type*>(Tmp); \
auto *Src1_d = reinterpret_cast<type*>(Src1); \
auto *Src2_d = reinterpret_cast<type*>(Src2); \
for (uint8_t i = 0; i < Elements; ++i) { \
Dst_d[i] = func(Src1_d[i], Src2_d[i], min, max); \
} \
break; \
}
case IR::OP_VUSHRI: {
auto Op = IROp->C<IR::IROp_VUShrI>();
void *Src = GetSrc<void*>(Op->Header.Args[0]);
@ -2048,6 +2061,82 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
memcpy(GDP, Tmp, Op->RegisterSize);
break;
}
case IR::OP_VUQADD: {
auto Op = IROp->C<IR::IROp_VUQAdd>();
void *Src1 = GetSrc<void*>(Op->Header.Args[0]);
void *Src2 = GetSrc<void*>(Op->Header.Args[1]);
uint8_t Tmp[16];
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
auto Func = [](auto a, auto b) {
decltype(a) res = a + b;
return res < a ? ~0U : res;
};
switch (Op->ElementSize) {
DO_VECTOR_OP(1, uint8_t, Func)
DO_VECTOR_OP(2, uint16_t, Func)
DO_VECTOR_OP(4, uint32_t, Func)
DO_VECTOR_OP(8, uint64_t, Func)
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
}
memcpy(GDP, Tmp, Op->RegisterSize);
break;
}
case IR::OP_VUQSUB: {
auto Op = IROp->C<IR::IROp_VUQSub>();
void *Src1 = GetSrc<void*>(Op->Header.Args[0]);
void *Src2 = GetSrc<void*>(Op->Header.Args[1]);
uint8_t Tmp[16];
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
auto Func = [](auto a, auto b) {
decltype(a) res = a - b;
return res > a ? 0U : res;
};
switch (Op->ElementSize) {
DO_VECTOR_OP(1, uint8_t, Func)
DO_VECTOR_OP(2, uint16_t, Func)
DO_VECTOR_OP(4, uint32_t, Func)
DO_VECTOR_OP(8, uint64_t, Func)
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
}
memcpy(GDP, Tmp, Op->RegisterSize);
break;
}
case IR::OP_VSQADD: {
auto Op = IROp->C<IR::IROp_VSQAdd>();
void *Src1 = GetSrc<void*>(Op->Header.Args[0]);
void *Src2 = GetSrc<void*>(Op->Header.Args[1]);
uint8_t Tmp[16];
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
auto Func = [](auto a, auto b) {
decltype(a) res = a + b;
if (a > 0) {
if (b > (std::numeric_limits<decltype(a)>::max() - a)) {
return std::numeric_limits<decltype(a)>::max();
}
}
else if (b < (std::numeric_limits<decltype(a)>::min() - a)) {
return std::numeric_limits<decltype(a)>::min();
}
return res;
};
switch (Op->ElementSize) {
DO_VECTOR_OP(1, int8_t, Func)
DO_VECTOR_OP(2, int16_t, Func)
DO_VECTOR_OP(4, int32_t, Func)
DO_VECTOR_OP(8, int64_t, Func)
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
}
memcpy(GDP, Tmp, Op->RegisterSize);
break;
}
case IR::OP_VFADD: {
auto Op = IROp->C<IR::IROp_VFAdd>();
void *Src1 = GetSrc<void*>(Op->Header.Args[0]);
@ -2300,7 +2389,7 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
uint8_t Tmp[16];
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
uint8_t Elements = Op->RegisterSize / (Op->ElementSize << 1);
auto Func = [](auto a, auto b) { return a * b; };
switch (Op->ElementSize) {
@ -2319,7 +2408,7 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
uint8_t Tmp[16];
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
uint8_t Elements = Op->RegisterSize / (Op->ElementSize << 1);
auto Func = [](auto a, auto b) { return a * b; };
switch (Op->ElementSize) {
@ -2478,7 +2567,7 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
uint8_t Tmp[16];
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
auto Func = [](auto a, auto b) { return a << b; };
auto Func = [](auto a, auto b) { return b >= (sizeof(a) * 8) ? 0 : a << b; };
switch (Op->ElementSize) {
DO_VECTOR_OP(1, uint8_t, Func)
@ -2490,6 +2579,26 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
memcpy(GDP, Tmp, Op->RegisterSize);
break;
}
case IR::OP_VSSHR: {
auto Op = IROp->C<IR::IROp_VSShr>();
void *Src1 = GetSrc<void*>(Op->Header.Args[0]);
void *Src2 = GetSrc<void*>(Op->Header.Args[1]);
uint8_t Tmp[16];
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
auto Func = [](auto a, auto b) { return b >= (sizeof(a) * 8) ? (a >> (sizeof(a) * 8 - 1)) : a >> b; };
switch (Op->ElementSize) {
DO_VECTOR_OP(1, int8_t, Func)
DO_VECTOR_OP(2, int16_t, Func)
DO_VECTOR_OP(4, int32_t, Func)
DO_VECTOR_OP(8, int64_t, Func)
default: LogMan::Msg::A("Unknown Element Size: %d", Op->ElementSize); break;
}
memcpy(GDP, Tmp, Op->RegisterSize);
break;
}
case IR::OP_VUSHLS: {
auto Op = IROp->C<IR::IROp_VUShlS>();
void *Src1 = GetSrc<void*>(Op->Header.Args[0]);
@ -2497,7 +2606,7 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
uint8_t Tmp[16];
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
auto Func = [](auto a, auto b) { return a << b; };
auto Func = [](auto a, auto b) { return b >= (sizeof(a) * 8) ? 0 : a << b; };
switch (Op->ElementSize) {
DO_VECTOR_SCALAR_OP(1, uint8_t, Func)
@ -2517,7 +2626,7 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
uint8_t Tmp[16];
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
auto Func = [](auto a, auto b) { return a >> b; };
auto Func = [](auto a, auto b) { return b >= (sizeof(a) * 8) ? 0 : a >> b; };
switch (Op->ElementSize) {
DO_VECTOR_SCALAR_OP(1, uint8_t, Func)
@ -2537,7 +2646,7 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
uint8_t Tmp[16];
uint8_t Elements = Op->RegisterSize / Op->ElementSize;
auto Func = [](auto a, auto b) { return a >> b; };
auto Func = [](auto a, auto b) { return b >= (sizeof(a) * 8) ? (a >> (sizeof(a) * 8 - 1)) : a >> b; };
switch (Op->ElementSize) {
DO_VECTOR_SCALAR_OP(1, int8_t, Func)
@ -2854,7 +2963,7 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
__uint128_t Src1 = *GetSrc<__uint128_t*>(Op->Header.Args[0]);
__uint128_t Src2 = *GetSrc<__uint128_t*>(Op->Header.Args[1]);
uint64_t Offset = Op->Index * 8;
uint64_t Offset = Op->Index * Op->ElementSize * 8;
__uint128_t Dst = (Src1 << (sizeof(__uint128_t) * 8 - Offset)) | (Src2 >> Offset);
memcpy(GDP, &Dst, Op->RegisterSize);
@ -2865,7 +2974,7 @@ void InterpreterCore::ExecuteCode(FEXCore::Core::InternalThreadState *Thread) {
__uint128_t Src1 = *GetSrc<__uint128_t*>(Op->Header.Args[0]);
__uint128_t Src2 = *GetSrc<__uint128_t*>(Op->Header.Args[1]);
uint64_t Offset = Op->Index * 8;
uint64_t Offset = Op->Index * Op->ElementSize * 8;
__uint128_t Mask = (1ULL << (Op->ElementSize * 8)) - 1;
Mask <<= Offset;
Mask = ~Mask;

View File

@ -2365,22 +2365,7 @@ template<FEXCore::IR::IROps IROp, size_t ElementSize>
void OpDispatchBuilder::VectorScalarALUOp(OpcodeArgs) {
auto Size = GetSrcSize(Op);
OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, -1);
OrderedNode *Src{};
if (Op->Src[0].TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_GPR) {
Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
}
else {
// Load a elementsize from memory and move it to a vector register
Src = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], ElementSize, Op->Flags, -1);
switch (ElementSize) {
case 4:
Src = _SplatVector4(Src);
break;
case 8:
Src = _SplatVector2(Src);
break;
}
}
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
// If OpSize == ElementSize then it only does the lower scalar op
auto ALUOp = _VAdd(ElementSize, ElementSize, Dest, Src);
@ -2406,10 +2391,14 @@ void OpDispatchBuilder::VectorUnaryOp(OpcodeArgs) {
// Overwrite our IR's op type
ALUOp.first->Header.Op = IROp;
// Insert the lower bits
auto Result = _VInsElement(Size, ElementSize, 0, 0, Dest, ALUOp);
StoreResult(FPRClass, Op, Result, -1);
if (Scalar) {
// Insert the lower bits
auto Result = _VInsElement(GetSrcSize(Op), ElementSize, 0, 0, Dest, ALUOp);
StoreResult(FPRClass, Op, Result, -1);
}
else {
StoreResult(FPRClass, Op, ALUOp, -1);
}
}
void OpDispatchBuilder::MOVQOp(OpcodeArgs) {
@ -2549,10 +2538,10 @@ void OpDispatchBuilder::ANDNOp(OpcodeArgs) {
template<size_t ElementSize>
void OpDispatchBuilder::PINSROp(OpcodeArgs) {
auto Size = GetSrcSize(Op);
auto Size = GetDstSize(Op);
OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, -1);
OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, -1);
OrderedNode *Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, GetDstSize(Op), Op->Flags, -1);
LogMan::Throw::A(Op->Src[1].TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_LITERAL, "Src1 needs to be literal here");
uint64_t Index = Op->Src[1].TypeLiteral.Literal;
@ -3806,6 +3795,25 @@ void OpDispatchBuilder::PSLL(OpcodeArgs) {
StoreResult(FPRClass, Op, Result, -1);
}
template<size_t ElementSize, bool Scalar, uint32_t SrcIndex>
void OpDispatchBuilder::PSRAOp(OpcodeArgs) {
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[SrcIndex], Op->Flags, -1);
OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, -1);
auto Size = GetDstSize(Op);
OrderedNode *Result{};
if (Scalar) {
Result = _VSShrS(Size, ElementSize, Dest, Src);
}
else {
Result = _VSShr(Size, ElementSize, Dest, Src);
}
StoreResult(FPRClass, Op, Result, -1);
}
void OpDispatchBuilder::PSRLDQ(OpcodeArgs) {
LogMan::Throw::A(Op->Src[1].TypeNone.Type == FEXCore::X86Tables::DecodedOperand::TYPE_LITERAL, "Src1 needs to be literal here");
uint64_t Shift = Op->Src[1].TypeLiteral.Literal;
@ -3874,7 +3882,7 @@ void OpDispatchBuilder::CVT(OpcodeArgs) {
else
Src = _UCVTF(Src, DstElementSize);
OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, -1);
OrderedNode *Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, 16, Op->Flags, -1);
Src = _VInsElement(16, DstElementSize, 0, 0, Dest, Src);
@ -3972,7 +3980,7 @@ template<size_t ElementSize, bool Scalar>
void OpDispatchBuilder::VFCMPOp(OpcodeArgs) {
auto Size = GetSrcSize(Op);
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, -1);
OrderedNode *Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, GetDstSize(Op), Op->Flags, -1);
uint8_t CompType = Op->Src[1].TypeLiteral.Literal;
OrderedNode *Result{};
@ -4002,7 +4010,7 @@ void OpDispatchBuilder::VFCMPOp(OpcodeArgs) {
if (Scalar) {
// Insert the lower bits
Result = _VInsElement(Size, ElementSize, 0, 0, Dest, Result);
Result = _VInsElement(GetDstSize(Op), ElementSize, 0, 0, Dest, Result);
}
StoreResult(FPRClass, Op, Result, -1);
@ -4658,7 +4666,7 @@ void InstallOpcodeHandlers() {
{0x5F, 1, &OpDispatchBuilder::VectorScalarALUOp<IR::OP_VFMAX, 8>},
{0x70, 1, &OpDispatchBuilder::PSHUFDOp<2, true>},
{0xC2, 1, &OpDispatchBuilder::VFCMPOp<8, true>},
{0xF0, 1, &OpDispatchBuilder::UnimplementedOp},
{0xF0, 1, &OpDispatchBuilder::MOVVectorOp},
};
const std::vector<std::tuple<uint8_t, uint8_t, FEXCore::X86Tables::OpDispatchPtr>> OpSizeModOpTable = {
@ -4707,7 +4715,7 @@ void InstallOpcodeHandlers() {
{0x7E, 1, &OpDispatchBuilder::MOVDOp},
{0x7F, 1, &OpDispatchBuilder::MOVUPSOp},
{0xC2, 1, &OpDispatchBuilder::VFCMPOp<8, false>},
{0xC4, 1, &OpDispatchBuilder::PINSROp<4>},
{0xC4, 1, &OpDispatchBuilder::PINSROp<2>},
{0xC6, 1, &OpDispatchBuilder::SHUFOp<8>},
{0xD4, 1, &OpDispatchBuilder::PADDQOp<8>},
@ -4719,13 +4727,13 @@ void InstallOpcodeHandlers() {
{0xDB, 1, &OpDispatchBuilder::VectorALUOp<IR::OP_VAND, 16>},
{0xDE, 1, &OpDispatchBuilder::PMAXUOp<1>},
{0xDF, 1, &OpDispatchBuilder::ANDNOp},
{0xE1, 1, &OpDispatchBuilder::PSRAIOp<2>},
{0xE2, 1, &OpDispatchBuilder::PSRAIOp<4>},
{0xE1, 1, &OpDispatchBuilder::PSRAOp<2, true, 0>},
{0xE2, 1, &OpDispatchBuilder::PSRAOp<4, true, 0>},
{0xE7, 1, &OpDispatchBuilder::MOVVectorOp},
{0xEA, 1, &OpDispatchBuilder::PMINSWOp},
{0xEB, 1, &OpDispatchBuilder::VectorALUOp<IR::OP_VOR, 16>},
{0xEC, 1, &OpDispatchBuilder::VectorALUOp<IR::OP_VQADD, 1>},
{0xED, 1, &OpDispatchBuilder::VectorALUOp<IR::OP_VQADD, 2>},
{0xEC, 1, &OpDispatchBuilder::VectorALUOp<IR::OP_VSQADD, 1>},
{0xED, 1, &OpDispatchBuilder::VectorALUOp<IR::OP_VSQADD, 2>},
{0xEE, 1, &OpDispatchBuilder::VectorALUOp<IR::OP_VSMAX, 2>},
{0xEF, 1, &OpDispatchBuilder::VectorALUOp<IR::OP_VXOR, 16>},

View File

@ -223,6 +223,8 @@ public:
void PSLLI(OpcodeArgs);
template<size_t ElementSize, bool Scalar, uint32_t SrcIndex>
void PSLL(OpcodeArgs);
template<size_t ElementSize, bool Scalar, uint32_t SrcIndex>
void PSRAOp(OpcodeArgs);
void PSRLDQ(OpcodeArgs);
void PSLLDQ(OpcodeArgs);
template<size_t ElementSize>
@ -332,11 +334,17 @@ public:
IRPair<IROp_VSub> _VSub(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VSub(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VQAdd> _VQAdd(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VQAdd(ssa0, ssa1, RegisterSize, ElementSize);
IRPair<IROp_VUQAdd> _VUQAdd(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VUQAdd(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VQSub> _VQSub(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VQSub(ssa0, ssa1, RegisterSize, ElementSize);
IRPair<IROp_VUQSub> _VUQSub(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VUQSub(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VSQAdd> _VSQAdd(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VSQAdd(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VSQSub> _VSQSub(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VSQSub(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VUMin> _VUMin(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VUMin(ssa0, ssa1, RegisterSize, ElementSize);
@ -392,6 +400,9 @@ public:
IRPair<IROp_VUShr> _VUShr(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VUShr(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VSShr> _VSShr(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1) {
return _VSShr(ssa0, ssa1, RegisterSize, ElementSize);
}
IRPair<IROp_VExtr> _VExtr(uint8_t RegisterSize, uint8_t ElementSize, OrderedNode *ssa0, OrderedNode *ssa1, uint8_t Index) {
return _VExtr(ssa0, ssa1, RegisterSize, ElementSize, Index);
}

View File

@ -492,7 +492,7 @@ void InitializeSecondaryTables() {
{0xC0, 2, X86InstInfo{"", TYPE_COPY_OTHER, FLAGS_NONE, 0, nullptr}},
{0xC2, 1, X86InstInfo{"CMPPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{0xC3, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}},
{0xC4, 1, X86InstInfo{"PINSRW", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_SF_SRC_GPR | FLAGS_XMM_FLAGS, 1, nullptr}},
{0xC4, 1, X86InstInfo{"PINSRW", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_16BIT) | FLAGS_MODRM | FLAGS_SF_SRC_GPR | FLAGS_XMM_FLAGS, 1, nullptr}},
{0xC5, 1, X86InstInfo{"PEXTRW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS, 1, nullptr}},
{0xC6, 1, X86InstInfo{"SHUFPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{0xC7, 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0, nullptr}},

View File

@ -591,21 +591,21 @@
"SplatVector2": {
"HasDest": true,
"NumElements": "2",
"DestSize": "GetOpSize(ssa0)",
"DestSize": "GetOpSize(ssa0) * 2",
"SSAArgs": "1"
},
"SplatVector3": {
"HasDest": true,
"NumElements": "3",
"DestSize": "GetOpSize(ssa0)",
"DestSize": "GetOpSize(ssa0) * 3",
"SSAArgs": "1"
},
"SplatVector4": {
"HasDest": true,
"NumElements": "4",
"DestSize": "GetOpSize(ssa0)",
"DestSize": "GetOpSize(ssa0) * 4",
"SSAArgs": "1"
},
@ -654,7 +654,7 @@
]
},
"VQAdd": {
"VUQAdd": {
"HasDest": true,
"SSAArgs": "2",
"Args": [
@ -663,7 +663,25 @@
]
},
"VQSub": {
"VUQSub": {
"HasDest": true,
"SSAArgs": "2",
"Args": [
"uint8_t", "RegisterSize",
"uint8_t", "ElementSize"
]
},
"VSQAdd": {
"HasDest": true,
"SSAArgs": "2",
"Args": [
"uint8_t", "RegisterSize",
"uint8_t", "ElementSize"
]
},
"VSQSub": {
"HasDest": true,
"SSAArgs": "2",
"Args": [
@ -906,6 +924,15 @@
]
},
"VSShr": {
"HasDest": true,
"SSAArgs": "2",
"Args": [
"uint8_t", "RegisterSize",
"uint8_t", "ElementSize"
]
},
"VUShlS": {
"HasDest": true,
"SSAArgs": "2",