mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-01-23 06:56:31 +00:00
OpcodeDispatcher: Optimizes SSE movmaskps
This now improves the instruction implementation from 17 instructions down to 5 or 6 depending on if the host supports SVE. I would say this is now optimal.
This commit is contained in:
parent
8d110738ac
commit
514a8223d9
@ -14,6 +14,7 @@ constexpr static uint64_t NamedVectorConstants[FEXCore::IR::NamedVectorConstant:
|
||||
{0x0000'0000'8000'0000, 0x0000'0000'8000'0000}, // NAMED_VECTOR_PADDSUBPS_INVERT_UPPER
|
||||
{0x8000'0000'0000'0000, 0x0000'0000'0000'0000}, // NAMED_VECTOR_PADDSUBPD_INVERT
|
||||
{0x8000'0000'0000'0000, 0x0000'0000'0000'0000}, // NAMED_VECTOR_PADDSUBPD_INVERT_UPPER
|
||||
{0x0000'0001'0000'0000, 0x0000'0003'0000'0002}, // NAMED_VECTOR_MOVMSKPS_SHIFT
|
||||
};
|
||||
|
||||
constexpr static auto PSHUFLW_LUT {
|
||||
@ -130,12 +131,9 @@ CPUBackend::CPUBackend(FEXCore::Core::InternalThreadState *ThreadState, size_t I
|
||||
auto &Common = ThreadState->CurrentFrame->Pointers.Common;
|
||||
|
||||
// Initialize named vector constants.
|
||||
Common.NamedVectorConstantPointers[FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_INCREMENTAL_U16_INDEX] = reinterpret_cast<uint64_t>(NamedVectorConstants[0]);
|
||||
Common.NamedVectorConstantPointers[FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_INCREMENTAL_U16_INDEX_UPPER] = reinterpret_cast<uint64_t>(NamedVectorConstants[1]);
|
||||
Common.NamedVectorConstantPointers[FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_PADDSUBPS_INVERT] = reinterpret_cast<uint64_t>(NamedVectorConstants[2]);
|
||||
Common.NamedVectorConstantPointers[FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_PADDSUBPS_INVERT_UPPER] = reinterpret_cast<uint64_t>(NamedVectorConstants[3]);
|
||||
Common.NamedVectorConstantPointers[FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_PADDSUBPD_INVERT] = reinterpret_cast<uint64_t>(NamedVectorConstants[4]);
|
||||
Common.NamedVectorConstantPointers[FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_PADDSUBPD_INVERT_UPPER] = reinterpret_cast<uint64_t>(NamedVectorConstants[5]);
|
||||
for (size_t i = 0; i < FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_MAX; ++i) {
|
||||
Common.NamedVectorConstantPointers[i] = reinterpret_cast<uint64_t>(NamedVectorConstants[i]);
|
||||
}
|
||||
|
||||
// Initialize Indexed named vector constants.
|
||||
Common.IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFLW] = reinterpret_cast<uint64_t>(PSHUFLW_LUT.data());
|
||||
|
@ -81,6 +81,16 @@ DEF_OP(LoadNamedVectorConstant) {
|
||||
|
||||
const auto Dst = GetVReg(Node);
|
||||
|
||||
if (HostSupportsSVE128) {
|
||||
switch (Op->Constant) {
|
||||
case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_MOVMSKPS_SHIFT:
|
||||
index(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), 0, 1);
|
||||
return;
|
||||
default:
|
||||
// Intentionally doing nothing.
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Load the pointer.
|
||||
ldr(TMP1, STATE_PTR(CpuStateFrame, Pointers.Common.NamedVectorConstantPointers[Op->Constant]));
|
||||
|
||||
|
@ -787,6 +787,19 @@ void OpDispatchBuilder::MOVMSKOp(OpcodeArgs) {
|
||||
GPR = _Lshr(GPR, _Constant(62));
|
||||
StoreResult_WithOpSize(GPRClass, Op, Op->Dest, GPR, CTX->GetGPRSize(), -1);
|
||||
}
|
||||
else if (Size == 16 && ElementSize == 4) {
|
||||
// Shift all the sign bits to the bottom of their respective elements.
|
||||
Src = _VUShrI(Size, 4, Src, 31);
|
||||
// Load the specific 128-bit movmskps shift elements operator.
|
||||
auto ConstantUSHL = LoadAndCacheNamedVectorConstant(Size, NAMED_VECTOR_MOVMSKPS_SHIFT);
|
||||
// Shift the sign bits in to specific locations.
|
||||
Src = _VUShl(Size, 4, Src, ConstantUSHL, false);
|
||||
// Add across the vector so the sign bits will end up in bits [3:0]
|
||||
Src = _VAddV(Size, 4, Src);
|
||||
// Extract to a GPR.
|
||||
OrderedNode *GPR = _VExtractToGPR(Size, 4, Src, 0);
|
||||
StoreResult_WithOpSize(GPRClass, Op, Op->Dest, GPR, CTX->GetGPRSize(), -1);
|
||||
}
|
||||
else {
|
||||
OrderedNode *CurrentVal = _Constant(0);
|
||||
|
||||
|
@ -523,6 +523,7 @@ enum NamedVectorConstant : uint8_t {
|
||||
NAMED_VECTOR_PADDSUBPS_INVERT_UPPER,
|
||||
NAMED_VECTOR_PADDSUBPD_INVERT,
|
||||
NAMED_VECTOR_PADDSUBPD_INVERT_UPPER,
|
||||
NAMED_VECTOR_MOVMSKPS_SHIFT,
|
||||
NAMED_VECTOR_MAX,
|
||||
};
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user