mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-01-07 14:10:23 +00:00
fb7167c2d2
We had failed to enable these implementations for the `ExtendedMemOperand` helpers. We had already implemented the non-helper forms, which are already tested in CI. These helpers just weren't updated? Noticed this when running libaom's SSE4.1 tests, where it managed to execute a pmovzxbq instruction with reg+reg memory source and was breaking the test results. There are /very/ few vector register operations that access only 8-bit or 16-bit in vectors so this flew under the radar for quite a while. Fixes their unit tests. Also adds a unittest using sse4.1 pmovzxbq to ensure we support the reg+reg case, and also a few other instructions to test 8-bit and 16-bit vector loads and stores.
52 lines
1.2 KiB
NASM
52 lines
1.2 KiB
NASM
%ifdef CONFIG
|
|
{
|
|
"RegData": {
|
|
"XMM0": ["0x0000000000000078", "0x0000000000000077"],
|
|
"XMM1": ["0x0000000000000078", "0x0000000000000077"],
|
|
"XMM2": ["0x0000000000000078", "0x0000000000000077"],
|
|
"XMM3": ["0x0000000000000078", "0x7800000000000077"],
|
|
"XMM4": ["0x0000000000000078", "0"],
|
|
"XMM5": ["0x0000000000000078", "0"]
|
|
}
|
|
}
|
|
%endif
|
|
|
|
; FEX-Emu had a bug with vector loadstore instructions where 16-bit and 8-bit vector loadstores with reg+reg source would assert in the code emitter.
|
|
; This affected both vector loads and stores. SSE 8-bit and 16-bit are quite uncommon so this isn't encountered frequently.
|
|
; Tests a few different instructions that access 16-bit and 8-bit with loads and stores.
|
|
lea rax, [rel .data]
|
|
lea rbx, [rel .data_temp]
|
|
mov rcx, 0
|
|
jmp .test
|
|
.test:
|
|
|
|
; 16-bit loads
|
|
pmovzxbq xmm0, [rax + rcx]
|
|
pmovzxbq xmm1, [rax + rcx*2]
|
|
pmovzxbq xmm2, [rax + rcx*4]
|
|
pmovzxbq xmm3, [rax + rcx*8]
|
|
|
|
; 8-bit load
|
|
pinsrb xmm3, [rax + rcx], 1111b
|
|
|
|
; 8-bit store
|
|
pextrb [rbx + rcx], xmm0, 0
|
|
|
|
; Load the result back
|
|
movaps xmm4, [rbx + rcx]
|
|
|
|
; 16-bit store
|
|
pextrb [rbx + rcx], xmm0, 0
|
|
|
|
; Load the result back
|
|
movaps xmm5, [rbx + rcx]
|
|
|
|
hlt
|
|
align 32
|
|
.data:
|
|
dq 0x7172737475767778
|
|
dq 0x4142434445464748
|
|
|
|
.data_temp:
|
|
dq 0,0
|