Merge pull request #3800 from Sonicadvance1/fix_vmovlhps

AVX128: Fixes vmovlhps
This commit is contained in:
Ryan Houdek 2024-07-01 14:41:43 -07:00 committed by GitHub
commit e2d4010b59
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 65 additions and 4 deletions

View File

@ -856,7 +856,13 @@ void OpDispatchBuilder::AVX128_VMOVLP(OpcodeArgs) {
void OpDispatchBuilder::AVX128_VMOVHP(OpcodeArgs) {
auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false);
if (Op->Dest.IsGPR()) {
if (!Op->Dest.IsGPR()) {
///< VMOVHPS/PD mem64, xmm1
// Need to store Bits[127:64]. Use a vector element store.
auto Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, OpSize::i64Bit, Op->Flags, {.LoadData = false});
_VStoreVectorElement(OpSize::i128Bit, OpSize::i64Bit, Src1.Low, 1, Dest);
} else if (!Op->Src[1].IsGPR()) {
///< VMOVHPS/PD xmm2, xmm1, mem64
auto Src2 = LoadSource_WithOpSize(GPRClass, Op, Op->Src[1], OpSize::i64Bit, Op->Flags, {.LoadData = false});
// Bits[63:0] come from Src1[63:0]
@ -866,9 +872,13 @@ void OpDispatchBuilder::AVX128_VMOVHP(OpcodeArgs) {
AVX128_StoreResult_WithOpSize(Op, Op->Dest, RefPair {.Low = Result_Low, .High = ZeroVector});
} else {
// Need to store Bits[127:64]. Use a vector element store.
auto Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, OpSize::i64Bit, Op->Flags, {.LoadData = false});
_VStoreVectorElement(OpSize::i128Bit, OpSize::i64Bit, Src1.Low, 1, Dest);
// VMOVLHPS xmm1, xmm2, xmm3
auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, false);
Ref Result_Low = _VZip(OpSize::i128Bit, OpSize::i64Bit, Src1.Low, Src2.Low);
Ref ZeroVector = LoadZeroVector(OpSize::i128Bit);
AVX128_StoreResult_WithOpSize(Op, Op->Dest, RefPair {.Low = Result_Low, .High = ZeroVector});
}
}

View File

@ -0,0 +1,28 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM0": ["0x43cc1ad6970b4549", "0xc4be43cc1ad6970b", "0", "0"],
"XMM1": ["0x43cc1ad6970b4549", "0xbd7eb46a1278f793", "0xef673dac6e4cbb7b", "0x5b3d85d342718be9"],
"XMM2": ["0xc4be43cc1ad6970b", "0x4549bd7eb46a1278", "0xf793ef673dac6e4c", "0xbb7b5b3d85d34271"]
}
}
%endif
lea rdx, [rel .data]
vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vmovapd ymm2, [rdx + 64]
vmovlhps xmm0, xmm1, xmm2
hlt
align 32
.data:
dq 0xfdecd28fab3fa4a5, 0x7d7ccd8836d09fc2, 0xccdbcfc31f3ff0f3, 0x108390defebac4be
dq 0x43cc1ad6970b4549, 0xbd7eb46a1278f793, 0xef673dac6e4cbb7b, 0x5b3d85d342718be9
dq 0xc4be43cc1ad6970b, 0x4549bd7eb46a1278, 0xf793ef673dac6e4c, 0xbb7b5b3d85d34271
dq 0x000043cc1ad6970b, 0x4549bd7eb46a1278, 0xf793ef673dac6e4c, 0xbb7b5b3d85d34271

View File

@ -455,6 +455,17 @@
"str q2, [x28, #16]"
]
},
"vmovlhps xmm0, xmm1, xmm1": {
"ExpectedInstructionCount": 3,
"Comment": [
"Map 1 0b01 0x16 128-bit"
],
"ExpectedArm64ASM": [
"zip1 v16.2d, v17.2d, v17.2d",
"movi v2.2d, #0x0",
"str q2, [x28, #16]"
]
},
"vmovshdup xmm0, [rax]": {
"ExpectedInstructionCount": 4,
"Comment": [

View File

@ -420,6 +420,18 @@
"mov v16.d[1], v3.d[0]"
]
},
"vmovlhps xmm0, xmm1, xmm1": {
"ExpectedInstructionCount": 4,
"Comment": [
"Map 1 0b01 0x16 128-bit"
],
"ExpectedArm64ASM": [
"mov v2.8b, v17.8b",
"mov v3.8b, v17.8b",
"mov v16.16b, v2.16b",
"mov v16.d[1], v3.d[0]"
]
},
"vmovshdup xmm0, [rax]": {
"ExpectedInstructionCount": 2,
"Comment": [