Merge pull request #3756 from Sonicadvance1/fix_vmovhlps

Fix VMOVLHPS instruction
This commit is contained in:
Ryan Houdek 2024-06-24 19:14:56 -07:00 committed by GitHub
commit 3a310b8815
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 68 additions and 17 deletions

View File

@ -748,17 +748,26 @@ void OpDispatchBuilder::AVX128_MOVQ(OpcodeArgs) {
void OpDispatchBuilder::AVX128_VMOVLP(OpcodeArgs) {
auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false);
if (Op->Dest.IsGPR()) {
auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, false);
if (!Op->Dest.IsGPR()) {
///< VMOVLPS/PD mem64, xmm1
StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src1.Low, OpSize::i64Bit, OpSize::i64Bit);
} else if (!Op->Src[1].IsGPR()) {
///< VMOVLPS/PD xmm1, xmm2, mem64
// Bits[63:0] come from Src2[63:0]
// Bits[127:64] come from Src1[127:64]
Ref Result_Low = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 1, 1, Src2.Low, Src1.Low);
auto Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], OpSize::i64Bit, Op->Flags);
Ref Result_Low = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 1, 1, Src2, Src1.Low);
Ref ZeroVector = LoadZeroVector(OpSize::i128Bit);
AVX128_StoreResult_WithOpSize(Op, Op->Dest, RefPair {.Low = Result_Low, .High = ZeroVector});
} else {
StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src1.Low, OpSize::i64Bit, OpSize::i64Bit);
///< VMOVHLPS/PD xmm1, xmm2, xmm3
auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, false);
Ref Result_Low = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 0, 1, Src1.Low, Src2.Low);
Ref ZeroVector = LoadZeroVector(OpSize::i128Bit);
AVX128_StoreResult_WithOpSize(Op, Op->Dest, RefPair {.Low = Result_Low, .High = ZeroVector});
}
}

View File

@ -132,15 +132,23 @@ void OpDispatchBuilder::MOVLPOp(OpcodeArgs) {
}
void OpDispatchBuilder::VMOVLPOp(OpcodeArgs) {
if (Op->Dest.IsGPR()) {
Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 16});
Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, {.Align = 8});
Ref Result = _VInsElement(16, 8, 0, 0, Src1, Src2);
Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 16});
if (!Op->Dest.IsGPR()) {
///< VMOVLPS/PD mem64, xmm1
StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src1, 8, 8);
} else if (!Op->Src[1].IsGPR()) {
///< VMOVLPS/PD xmm1, xmm2, mem64
// Bits[63:0] come from Src2[63:0]
// Bits[127:64] come from Src1[127:64]
Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, {.Align = 8});
Ref Result = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 1, 1, Src2, Src1);
StoreResult(FPRClass, Op, Result, -1);
} else {
Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 8});
StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src, 8, 8);
///< VMOVHLPS/PD xmm1, xmm2, xmm3
Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, {.Align = 16});
Ref Result = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 0, 1, Src1, Src2);
StoreResult(FPRClass, Op, Result, -1);
}
}

View File

@ -27,8 +27,8 @@ std::array<X86InstInfo, MAX_VEX_TABLE_SIZE> VEXTableOps = []() consteval {
{OPD(1, 0b10, 0x11), 1, X86InstInfo{"VMOVSS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(1, 0b11, 0x11), 1, X86InstInfo{"VMOVSD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(1, 0b00, 0x12), 1, X86InstInfo{"VMOVLPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS | FLAGS_VEX_1ST_SRC, 0, nullptr}},
{OPD(1, 0b01, 0x12), 1, X86InstInfo{"VMOVLPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS | FLAGS_VEX_1ST_SRC, 0, nullptr}},
{OPD(1, 0b00, 0x12), 1, X86InstInfo{"VMOVLPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_VEX_1ST_SRC, 0, nullptr}},
{OPD(1, 0b01, 0x12), 1, X86InstInfo{"VMOVLPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_VEX_1ST_SRC, 0, nullptr}},
{OPD(1, 0b10, 0x12), 1, X86InstInfo{"VMOVSLDUP", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(1, 0b11, 0x12), 1, X86InstInfo{"VMOVDDUP", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}},

View File

@ -0,0 +1,34 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM1": ["0x4150f0e342241b6c", "0xdddddddddddddddd", "0x0000000000000000", "0x0000000000000000"],
"XMM2": ["0xCCCCCCCCCCCCCCCC", "0xDDDDDDDDDDDDDDDD", "0xEEEEEEEEEEEEEEEE", "0xFFFFFFFFFFFFFFFF"],
"XMM3": ["0x4150f0e342241b6c", "0xdddddddddddddddd", "0x0000000000000000", "0x0000000000000000"],
"XMM5": ["0x428b029f42a63326", "0x4150f0e342241b6c", "0x41aff21340ab4706", "0x40aa5bea411ac802"],
"XMM6": ["0x428b029f42a63326", "0x4150f0e342241b6c", "0x41aff21340ab4706", "0x40aa5bea411ac802"]
}
}
%endif
; Load inputs
vmovapd ymm1, [rel .data]
vmovapd ymm2, [rel .data + 32]
vmovapd ymm5, [rel .data_random]
vmovapd ymm6, [rel .data_random]
vmovhlps xmm1, xmm2, xmm5
vmovhlps xmm3, xmm1, xmm5
hlt
align 32
.data:
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD
db 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
.data_random:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303

View File

@ -216,8 +216,8 @@
],
"ExpectedArm64ASM": [
"ldr q2, [x4]",
"mov v16.16b, v17.16b",
"mov v16.d[0], v2.d[0]"
"mov v16.16b, v2.16b",
"mov v16.d[1], v17.d[1]"
]
},
"vmovlpd xmm0, xmm1, [rax]": {
@ -228,8 +228,8 @@
],
"ExpectedArm64ASM": [
"ldr q2, [x4]",
"mov v16.16b, v17.16b",
"mov v16.d[0], v2.d[0]"
"mov v16.16b, v2.16b",
"mov v16.d[1], v17.d[1]"
]
},
"vmovsldup xmm0, [rax]": {