CodeEmitter: Fixes vector {ldr,str}{b,h} with reg-reg source

We had failed to enable these implementations for the
`ExtendedMemOperand` helpers. We had already implemented the non-helper
forms, which are already tested in CI. These helpers just weren't
updated?

Noticed this when running libaom's SSE4.1 tests, where it managed to
execute a pmovzxbq instruction with reg+reg memory source and was
breaking the test results.

There are /very/ few vector register operations that access only 8-bit
or 16-bit in vectors so this flew under the radar for quite a while.

Fixes their unit tests.

Also adds a unittest using sse4.1 pmovzxbq to ensure we support the
reg+reg case, and also a few other instructions to test 8-bit and 16-bit
vector loads and stores.
This commit is contained in:
Ryan Houdek 2024-07-01 16:46:11 -07:00
parent d884eb9287
commit fb7167c2d2
No known key found for this signature in database
2 changed files with 55 additions and 10 deletions

View File

@ -3780,8 +3780,7 @@ public:
if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
MemSrc.MetaType.ExtendedType.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
LOGMAN_THROW_AA_FMT(MemSrc.MetaType.ExtendedType.Shift == false, "Can't shift byte");
LOGMAN_MSG_A_FMT("Nope"); // XXX: Implement
// strb(rt, MemSrc.rn, MemSrc.MetaType.ExtendedType.rm, MemSrc.MetaType.ExtendedType.Option);
strb(rt, MemSrc.rn, MemSrc.MetaType.ExtendedType.rm, MemSrc.MetaType.ExtendedType.Option);
}
else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
strb(rt, MemSrc.rn);
@ -3811,8 +3810,7 @@ public:
if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
MemSrc.MetaType.ExtendedType.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
LOGMAN_THROW_AA_FMT(MemSrc.MetaType.ExtendedType.Shift == false, "Can't shift byte");
LOGMAN_MSG_A_FMT("Nope"); // XXX: Implement
// ldrb(rt, MemSrc.rn, MemSrc.MetaType.ExtendedType.rm, MemSrc.MetaType.ExtendedType.Option);
ldrb(rt, MemSrc.rn, MemSrc.MetaType.ExtendedType.rm, MemSrc.MetaType.ExtendedType.Option);
}
else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
ldrb(rt, MemSrc.rn);
@ -3841,9 +3839,7 @@ public:
void strh(ARMEmitter::VRegister rt, ARMEmitter::ExtendedMemOperand MemSrc) {
if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
MemSrc.MetaType.ExtendedType.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
LOGMAN_THROW_AA_FMT(MemSrc.MetaType.ExtendedType.Shift == false, "Can't shift byte");
LOGMAN_MSG_A_FMT("Nope"); // XXX: Implement
// strh(rt, MemSrc.rn, MemSrc.MetaType.ExtendedType.rm, MemSrc.MetaType.ExtendedType.Option);
strh(rt, MemSrc.rn, MemSrc.MetaType.ExtendedType.rm, MemSrc.MetaType.ExtendedType.Option, MemSrc.MetaType.ExtendedType.Shift);
}
else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
strh(rt, MemSrc.rn);
@ -3872,9 +3868,7 @@ public:
void ldrh(ARMEmitter::VRegister rt, ARMEmitter::ExtendedMemOperand MemSrc) {
if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
MemSrc.MetaType.ExtendedType.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
LOGMAN_THROW_AA_FMT(MemSrc.MetaType.ExtendedType.Shift == false, "Can't shift byte");
LOGMAN_MSG_A_FMT("Nope"); // XXX: Implement
// ldrh(rt, MemSrc.rn, MemSrc.MetaType.ExtendedType.rm, MemSrc.MetaType.ExtendedType.Option);
ldrh(rt, MemSrc.rn, MemSrc.MetaType.ExtendedType.rm, MemSrc.MetaType.ExtendedType.Option, MemSrc.MetaType.ExtendedType.Shift);
}
else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
ldrh(rt, MemSrc.rn);

View File

@ -0,0 +1,51 @@
%ifdef CONFIG
{
"RegData": {
"XMM0": ["0x0000000000000078", "0x0000000000000077"],
"XMM1": ["0x0000000000000078", "0x0000000000000077"],
"XMM2": ["0x0000000000000078", "0x0000000000000077"],
"XMM3": ["0x0000000000000078", "0x7800000000000077"],
"XMM4": ["0x0000000000000078", "0"],
"XMM5": ["0x0000000000000078", "0"]
}
}
%endif
; FEX-Emu had a bug with vector loadstore instructions where 16-bit and 8-bit vector loadstores with reg+reg source would assert in the code emitter.
; This affected both vector loads and stores. SSE 8-bit and 16-bit are quite uncommon so this isn't encountered frequently.
; Tests a few different instructions that access 16-bit and 8-bit with loads and stores.
lea rax, [rel .data]
lea rbx, [rel .data_temp]
mov rcx, 0
jmp .test
.test:
; 16-bit loads
pmovzxbq xmm0, [rax + rcx]
pmovzxbq xmm1, [rax + rcx*2]
pmovzxbq xmm2, [rax + rcx*4]
pmovzxbq xmm3, [rax + rcx*8]
; 8-bit load
pinsrb xmm3, [rax + rcx], 1111b
; 8-bit store
pextrb [rbx + rcx], xmm0, 0
; Load the result back
movaps xmm4, [rbx + rcx]
; 16-bit store
pextrb [rbx + rcx], xmm0, 0
; Load the result back
movaps xmm5, [rbx + rcx]
hlt
align 32
.data:
dq 0x7172737475767778
dq 0x4142434445464748
.data_temp:
dq 0,0