mirror of
https://github.com/FEX-Emu/FEX.git
synced 2024-12-14 17:38:47 +00:00
ARMEmitter: Handle SVE Load Multiple Structures (scalar plus scalar) group
This commit is contained in:
parent
24d01cd8d2
commit
0176efa3bb
@ -2940,7 +2940,55 @@ public:
|
||||
}
|
||||
|
||||
// SVE load multiple structures (scalar plus scalar)
|
||||
// XXX:
|
||||
void ld2b(ZRegister zt1, ZRegister zt2, PRegisterZero pg, Register rn, Register rm) {
|
||||
LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2), "Registers need to be contiguous");
|
||||
SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i8Bit, 0b01, zt1, pg, rn, rm);
|
||||
}
|
||||
void ld3b(ZRegister zt1, ZRegister zt2, ZRegister zt3, PRegisterZero pg, Register rn, Register rm) {
|
||||
LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3), "Registers need to be contiguous");
|
||||
SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i8Bit, 0b10, zt1, pg, rn, rm);
|
||||
}
|
||||
void ld4b(ZRegister zt1, ZRegister zt2, ZRegister zt3, ZRegister zt4, PRegisterZero pg, Register rn, Register rm) {
|
||||
LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3, zt4), "Registers need to be contiguous");
|
||||
SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i8Bit, 0b11, zt1, pg, rn, rm);
|
||||
}
|
||||
void ld2h(ZRegister zt1, ZRegister zt2, PRegisterZero pg, Register rn, Register rm) {
|
||||
LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2), "Registers need to be contiguous");
|
||||
SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i16Bit, 0b01, zt1, pg, rn, rm);
|
||||
}
|
||||
void ld3h(ZRegister zt1, ZRegister zt2, ZRegister zt3, PRegisterZero pg, Register rn, Register rm) {
|
||||
LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3), "Registers need to be contiguous");
|
||||
SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i16Bit, 0b10, zt1, pg, rn, rm);
|
||||
}
|
||||
void ld4h(ZRegister zt1, ZRegister zt2, ZRegister zt3, ZRegister zt4, PRegisterZero pg, Register rn, Register rm) {
|
||||
LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3, zt4), "Registers need to be contiguous");
|
||||
SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i16Bit, 0b11, zt1, pg, rn, rm);
|
||||
}
|
||||
void ld2w(ZRegister zt1, ZRegister zt2, PRegisterZero pg, Register rn, Register rm) {
|
||||
LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2), "Registers need to be contiguous");
|
||||
SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i32Bit, 0b01, zt1, pg, rn, rm);
|
||||
}
|
||||
void ld3w(ZRegister zt1, ZRegister zt2, ZRegister zt3, PRegisterZero pg, Register rn, Register rm) {
|
||||
LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3), "Registers need to be contiguous");
|
||||
SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i32Bit, 0b10, zt1, pg, rn, rm);
|
||||
}
|
||||
void ld4w(ZRegister zt1, ZRegister zt2, ZRegister zt3, ZRegister zt4, PRegisterZero pg, Register rn, Register rm) {
|
||||
LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3, zt4), "Registers need to be contiguous");
|
||||
SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i32Bit, 0b11, zt1, pg, rn, rm);
|
||||
}
|
||||
void ld2d(ZRegister zt1, ZRegister zt2, PRegisterZero pg, Register rn, Register rm) {
|
||||
LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2), "Registers need to be contiguous");
|
||||
SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i64Bit, 0b01, zt1, pg, rn, rm);
|
||||
}
|
||||
void ld3d(ZRegister zt1, ZRegister zt2, ZRegister zt3, PRegisterZero pg, Register rn, Register rm) {
|
||||
LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3), "Registers need to be contiguous");
|
||||
SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i64Bit, 0b10, zt1, pg, rn, rm);
|
||||
}
|
||||
void ld4d(ZRegister zt1, ZRegister zt2, ZRegister zt3, ZRegister zt4, PRegisterZero pg, Register rn, Register rm) {
|
||||
LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3, zt4), "Registers need to be contiguous");
|
||||
SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i64Bit, 0b11, zt1, pg, rn, rm);
|
||||
}
|
||||
|
||||
// SVE load and broadcast quadword (scalar plus immediate)
|
||||
// XXX:
|
||||
|
||||
@ -4349,6 +4397,26 @@ private:
|
||||
dc32(Instr);
|
||||
}
|
||||
|
||||
void SVEContiguousLoadStoreMultipleScalar(bool is_store, SubRegSize msz, uint32_t opc, ZRegister zt,
|
||||
PRegister pg, Register rn, Register rm) {
|
||||
LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");
|
||||
LOGMAN_THROW_A_FMT(rm != Reg::rsp, "rm cannot be the stack pointer");
|
||||
|
||||
uint32_t Instr = 0b1010'0100'0000'0000'1100'0000'0000'0000;
|
||||
if (is_store) {
|
||||
Instr |= 0x40006000U;
|
||||
} else {
|
||||
Instr |= 0x0000C000U;
|
||||
}
|
||||
Instr |= FEXCore::ToUnderlying(msz) << 23;
|
||||
Instr |= opc << 21;
|
||||
Instr |= rm.Idx() << 16;
|
||||
Instr |= pg.Idx() << 10;
|
||||
Instr |= rn.Idx() << 5;
|
||||
Instr |= zt.Idx();
|
||||
dc32(Instr);
|
||||
}
|
||||
|
||||
void SVEIndexGeneration(uint32_t op, SubRegSize size, ZRegister zd, int32_t imm5, int32_t imm5b) {
|
||||
LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "INDEX cannot use 128-bit element sizes");
|
||||
|
||||
|
29
External/FEXCore/unittests/Emitter/SVE_Tests.cpp
vendored
29
External/FEXCore/unittests/Emitter/SVE_Tests.cpp
vendored
@ -4073,6 +4073,35 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Memory - 32-bit Gather and
|
||||
TEST_SINGLE(ldr(ZReg::z30, XReg::x29, -256), "ldr z30, [x29, #-256, mul vl]");
|
||||
TEST_SINGLE(ldr(ZReg::z30, XReg::x29, 255), "ldr z30, [x29, #255, mul vl]");
|
||||
}
|
||||
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE load multiple structures (scalar plus scalar)") {
|
||||
TEST_SINGLE(ld2b(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2b {z31.b, z0.b}, p6/z, [x29, x30]");
|
||||
TEST_SINGLE(ld2b(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2b {z26.b, z27.b}, p6/z, [x29, x30]");
|
||||
TEST_SINGLE(ld3b(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3b {z31.b, z0.b, z1.b}, p6/z, [x29, x30]");
|
||||
TEST_SINGLE(ld3b(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3b {z26.b, z27.b, z28.b}, p6/z, [x29, x30]");
|
||||
TEST_SINGLE(ld4b(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4b {z31.b, z0.b, z1.b, z2.b}, p6/z, [x29, x30]");
|
||||
TEST_SINGLE(ld4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4b {z26.b, z27.b, z28.b, z29.b}, p6/z, [x29, x30]");
|
||||
|
||||
TEST_SINGLE(ld2h(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2h {z31.h, z0.h}, p6/z, [x29, x30, lsl #1]");
|
||||
TEST_SINGLE(ld2h(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2h {z26.h, z27.h}, p6/z, [x29, x30, lsl #1]");
|
||||
TEST_SINGLE(ld3h(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3h {z31.h, z0.h, z1.h}, p6/z, [x29, x30, lsl #1]");
|
||||
TEST_SINGLE(ld3h(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3h {z26.h, z27.h, z28.h}, p6/z, [x29, x30, lsl #1]");
|
||||
TEST_SINGLE(ld4h(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4h {z31.h, z0.h, z1.h, z2.h}, p6/z, [x29, x30, lsl #1]");
|
||||
TEST_SINGLE(ld4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4h {z26.h, z27.h, z28.h, z29.h}, p6/z, [x29, x30, lsl #1]");
|
||||
|
||||
TEST_SINGLE(ld2w(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2w {z31.s, z0.s}, p6/z, [x29, x30, lsl #2]");
|
||||
TEST_SINGLE(ld2w(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2w {z26.s, z27.s}, p6/z, [x29, x30, lsl #2]");
|
||||
TEST_SINGLE(ld3w(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3w {z31.s, z0.s, z1.s}, p6/z, [x29, x30, lsl #2]");
|
||||
TEST_SINGLE(ld3w(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3w {z26.s, z27.s, z28.s}, p6/z, [x29, x30, lsl #2]");
|
||||
TEST_SINGLE(ld4w(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4w {z31.s, z0.s, z1.s, z2.s}, p6/z, [x29, x30, lsl #2]");
|
||||
TEST_SINGLE(ld4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4w {z26.s, z27.s, z28.s, z29.s}, p6/z, [x29, x30, lsl #2]");
|
||||
|
||||
TEST_SINGLE(ld2d(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2d {z31.d, z0.d}, p6/z, [x29, x30, lsl #3]");
|
||||
TEST_SINGLE(ld2d(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2d {z26.d, z27.d}, p6/z, [x29, x30, lsl #3]");
|
||||
TEST_SINGLE(ld3d(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3d {z31.d, z0.d, z1.d}, p6/z, [x29, x30, lsl #3]");
|
||||
TEST_SINGLE(ld3d(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3d {z26.d, z27.d, z28.d}, p6/z, [x29, x30, lsl #3]");
|
||||
TEST_SINGLE(ld4d(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4d {z31.d, z0.d, z1.d, z2.d}, p6/z, [x29, x30, lsl #3]");
|
||||
TEST_SINGLE(ld4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4d {z26.d, z27.d, z28.d, z29.d}, p6/z, [x29, x30, lsl #3]");
|
||||
}
|
||||
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE load multiple structures (scalar plus immediate)") {
|
||||
TEST_SINGLE(ld2b(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, 0), "ld2b {z31.b, z0.b}, p6/z, [x29]");
|
||||
TEST_SINGLE(ld2b(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, 0), "ld2b {z26.b, z27.b}, p6/z, [x29]");
|
||||
|
Loading…
Reference in New Issue
Block a user