mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-02-08 23:57:05 +00:00
AVX128: Implement support for vmovddup
This instruction is a little weird. When accessing memory, the 128-bit operating size of the instruction only loads 64-bits. Meanwhile the 256-bit operating size of the instruction fetches a full 256-bits. Theoretically the hardware could get away with two 64-bit loads or a wacky 24-byte load, but it looks like to simplify hardware they just spec'd it that the 256-bit version will always load the full range.
This commit is contained in:
parent
dbaf95a8f3
commit
96aafb4f07
@ -1006,6 +1006,7 @@ public:
|
||||
void AVX128_MOVQ(OpcodeArgs);
|
||||
void AVX128_VMOVLP(OpcodeArgs);
|
||||
void AVX128_VMOVHP(OpcodeArgs);
|
||||
void AVX128_VMOVDDUP(OpcodeArgs);
|
||||
|
||||
// End of AVX 128-bit implementation
|
||||
|
||||
|
@ -36,7 +36,7 @@ void OpDispatchBuilder::InstallAVX128Handlers() {
|
||||
{OPD(1, 0b00, 0x12), 1, &OpDispatchBuilder::AVX128_VMOVLP},
|
||||
{OPD(1, 0b01, 0x12), 1, &OpDispatchBuilder::AVX128_VMOVLP},
|
||||
// TODO: {OPD(1, 0b10, 0x12), 1, &OpDispatchBuilder::VMOVSLDUPOp},
|
||||
// TODO: {OPD(1, 0b11, 0x12), 1, &OpDispatchBuilder::VMOVDDUPOp},
|
||||
{OPD(1, 0b11, 0x12), 1, &OpDispatchBuilder::AVX128_VMOVDDUP},
|
||||
{OPD(1, 0b00, 0x13), 1, &OpDispatchBuilder::AVX128_VMOVLP},
|
||||
{OPD(1, 0b01, 0x13), 1, &OpDispatchBuilder::AVX128_VMOVLP},
|
||||
|
||||
@ -710,4 +710,39 @@ void OpDispatchBuilder::AVX128_VMOVHP(OpcodeArgs) {
|
||||
}
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::AVX128_VMOVDDUP(OpcodeArgs) {
|
||||
const auto SrcSize = GetSrcSize(Op);
|
||||
const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;
|
||||
|
||||
const auto IsSrcGPR = Op->Src[0].IsGPR();
|
||||
|
||||
RefPair Src {};
|
||||
if (IsSrcGPR) {
|
||||
Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
|
||||
} else {
|
||||
// Accesses from memory are a little weird.
|
||||
// 128-bit operation only loads 8-bytes.
|
||||
// 256-bit operation loads a full 32-bytes.
|
||||
if (Is128Bit) {
|
||||
Src.Low = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], OpSize::i64Bit, Op->Flags);
|
||||
} else {
|
||||
Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, true);
|
||||
}
|
||||
}
|
||||
|
||||
if (Is128Bit) {
|
||||
// Duplicate Src[63:0] in to low 128-bits
|
||||
auto Result_Low = _VDupElement(OpSize::i128Bit, OpSize::i64Bit, Src.Low, 0);
|
||||
Ref ZeroVector = LoadZeroVector(OpSize::i128Bit);
|
||||
|
||||
AVX128_StoreResult_WithOpSize(Op, Op->Dest, RefPair {.Low = Result_Low, .High = ZeroVector});
|
||||
} else {
|
||||
// Duplicate Src.Low[63:0] in to low 128-bits
|
||||
auto Result_Low = _VDupElement(OpSize::i128Bit, OpSize::i64Bit, Src.Low, 0);
|
||||
// Duplicate Src.High[63:0] in to high 128-bits
|
||||
auto Result_High = _VDupElement(OpSize::i128Bit, OpSize::i64Bit, Src.High, 0);
|
||||
AVX128_StoreResult_WithOpSize(Op, Op->Dest, RefPair {.Low = Result_Low, .High = Result_High});
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace FEXCore::IR
|
||||
|
Loading…
x
Reference in New Issue
Block a user