Merge pull request #3755 from Sonicadvance1/fix_avx128_vmovntdqa

AVX128: Fix vmovntdqa failing to zero upper 128-bits
This commit is contained in:
Ryan Houdek 2024-06-24 19:14:48 -07:00 committed by GitHub
commit 7ff96227c0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 51 additions and 1 deletions

View File

@ -724,6 +724,10 @@ void OpDispatchBuilder::AVX128_MOVVectorNT(OpcodeArgs) {
const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;
auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit, MemoryAccessType::STREAM);
if (Op->Dest.IsGPR() && Is128Bit) {
Src.High = LoadZeroVector(OpSize::i128Bit);
}
AVX128_StoreResult_WithOpSize(Op, Op->Dest, Src);
}

View File

@ -23,7 +23,15 @@ mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax
; Load results with random data first.
vmovaps ymm0, [rel .data_random]
vmovaps ymm1, [rel .data_random]
vmovntdqa xmm0, [rdx]
vmovntdqa ymm1, [rdx]
hlt
align 32
.data_random:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303

View File

@ -3,7 +3,11 @@
"HostFeatures": ["AVX"],
"RegData": {
"XMM0": ["0x6162636465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
"XMM3": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"]
"XMM3": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
"XMM4": ["0x428b029f42a63326", "0x4150f0e342241b6c", "0", "0"],
"XMM5": ["0x428b029f42a63326", "0x4150f0e342241b6c", "0x41aff21340ab4706", "0x40aa5bea411ac802"],
"XMM6": ["0x428b029f42a63326", "0x4150f0e342241b6c", "0x41aff21340ab4706", "0x40aa5bea411ac802"],
"XMM7": ["0x428b029f42a63326", "0x4150f0e342241b6c", "0", "0"]
},
"MemoryRegions": {
"0x100000000": "4096"
@ -39,4 +43,38 @@ vmovaps xmm0, [rdx + 8 * 4]
vmovntpd [rdx + 8 * 4], ymm2
vmovaps ymm3, [rdx + 8 * 4]
vmovaps ymm4, [rel .data_random]
vmovaps ymm5, [rel .data_random]
vmovaps ymm6, [rel .data_random]
vmovaps ymm7, [rel .data_random]
vmovntpd [rel .data_res1], xmm4
vmovaps xmm4, [rel .data_res1]
vmovntpd [rel .data_res2], xmm5
vmovaps ymm5, [rel .data_res2]
vmovntpd [rel .data_res3], ymm6
vmovaps ymm6, [rel .data_res3]
vmovntpd [rel .data_res4], ymm7
vmovaps xmm7, [rel .data_res4]
hlt
align 32
.data_random:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303
align 32
.data_res1:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303
.data_res2:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303
.data_res3:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303
.data_res4:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303