ARMEmitter: Handle SVE load and broadcast quadword (scalar plus imm) category

This commit is contained in:
Lioncache 2023-08-17 12:58:26 -04:00
parent cfc6368064
commit 6f9bc1e2fe
2 changed files with 83 additions and 1 deletions

View File

@ -3011,7 +3011,30 @@ public:
}
// SVE load and broadcast quadword (scalar plus immediate)
// XXX:
void ld1rqb(ZRegister zt, PRegisterZero pg, Register rn, int imm = 0) {
SVELoadBroadcastQuadScalarPlusImm(0b00, 0b00, zt, pg, rn, imm);
}
void ld1rob(ZRegister zt, PRegisterZero pg, Register rn, int imm = 0) {
SVELoadBroadcastQuadScalarPlusImm(0b00, 0b01, zt, pg, rn, imm);
}
void ld1rqh(ZRegister zt, PRegisterZero pg, Register rn, int imm = 0) {
SVELoadBroadcastQuadScalarPlusImm(0b01, 0b00, zt, pg, rn, imm);
}
void ld1roh(ZRegister zt, PRegisterZero pg, Register rn, int imm = 0) {
SVELoadBroadcastQuadScalarPlusImm(0b01, 0b01, zt, pg, rn, imm);
}
void ld1rqw(ZRegister zt, PRegisterZero pg, Register rn, int imm = 0) {
SVELoadBroadcastQuadScalarPlusImm(0b10, 0b00, zt, pg, rn, imm);
}
void ld1row(ZRegister zt, PRegisterZero pg, Register rn, int imm = 0) {
SVELoadBroadcastQuadScalarPlusImm(0b10, 0b01, zt, pg, rn, imm);
}
void ld1rqd(ZRegister zt, PRegisterZero pg, Register rn, int imm = 0) {
SVELoadBroadcastQuadScalarPlusImm(0b11, 0b00, zt, pg, rn, imm);
}
void ld1rod(ZRegister zt, PRegisterZero pg, Register rn, int imm = 0) {
SVELoadBroadcastQuadScalarPlusImm(0b11, 0b01, zt, pg, rn, imm);
}
// SVE contiguous load (scalar plus immediate)
template<SubRegSize size>
@ -4487,6 +4510,30 @@ private:
dc32(Instr);
}
void SVELoadBroadcastQuadScalarPlusImm(uint32_t msz, uint32_t ssz, ZRegister zt,
PRegister pg, Register rn, int imm) {
LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");
const auto esize = static_cast<int>(16 << ssz);
[[maybe_unused]] const auto max_imm = (esize << 3) - esize;
[[maybe_unused]] const auto min_imm = -(max_imm + esize);
LOGMAN_THROW_AA_FMT((imm % esize) == 0, "imm ({}) must be a multiple of {}", imm, esize);
LOGMAN_THROW_AA_FMT(imm >= min_imm && imm <= max_imm, "imm ({}) must be within [{}, {}]",
imm, min_imm, max_imm);
const auto sanitized_imm = static_cast<uint32_t>(imm / esize) & 0b1111;
uint32_t Instr = 0b1010'0100'0000'0000'0010'0000'0000'0000;
Instr |= msz << 23;
Instr |= ssz << 21;
Instr |= sanitized_imm << 16;
Instr |= pg.Idx() << 10;
Instr |= rn.Idx() << 5;
Instr |= zt.Idx();
dc32(Instr);
}
void SVELoadAndBroadcastElement(bool is_signed, SubRegSize esize, SubRegSize msize,
ZRegister zt, PRegister pg, Register rn, uint32_t imm) {
LOGMAN_THROW_AA_FMT(esize != SubRegSize::i128Bit, "Cannot use 128-bit elements.");

View File

@ -4169,6 +4169,41 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE load multiple structures (
TEST_SINGLE(ld4d(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4d {z31.d, z0.d, z1.d, z2.d}, p6/z, [x29, x30, lsl #3]");
TEST_SINGLE(ld4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4d {z26.d, z27.d, z28.d, z29.d}, p6/z, [x29, x30, lsl #3]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE load and broadcast quadword (scalar plus immediate)") {
TEST_SINGLE(ld1rqb(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rqb {z30.b}, p6/z, [x29]");
TEST_SINGLE(ld1rqb(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -128), "ld1rqb {z30.b}, p6/z, [x29, #-128]");
TEST_SINGLE(ld1rqb(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 112), "ld1rqb {z30.b}, p6/z, [x29, #112]");
TEST_SINGLE(ld1rob(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rob {z30.b}, p6/z, [x29]");
TEST_SINGLE(ld1rob(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -256), "ld1rob {z30.b}, p6/z, [x29, #-256]");
TEST_SINGLE(ld1rob(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 224), "ld1rob {z30.b}, p6/z, [x29, #224]");
TEST_SINGLE(ld1rqh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rqh {z30.h}, p6/z, [x29]");
TEST_SINGLE(ld1rqh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -128), "ld1rqh {z30.h}, p6/z, [x29, #-128]");
TEST_SINGLE(ld1rqh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 112), "ld1rqh {z30.h}, p6/z, [x29, #112]");
TEST_SINGLE(ld1roh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1roh {z30.h}, p6/z, [x29]");
TEST_SINGLE(ld1roh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -256), "ld1roh {z30.h}, p6/z, [x29, #-256]");
TEST_SINGLE(ld1roh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 224), "ld1roh {z30.h}, p6/z, [x29, #224]");
TEST_SINGLE(ld1rqw(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rqw {z30.s}, p6/z, [x29]");
TEST_SINGLE(ld1rqw(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -128), "ld1rqw {z30.s}, p6/z, [x29, #-128]");
TEST_SINGLE(ld1rqw(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 112), "ld1rqw {z30.s}, p6/z, [x29, #112]");
TEST_SINGLE(ld1row(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1row {z30.s}, p6/z, [x29]");
TEST_SINGLE(ld1row(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -256), "ld1row {z30.s}, p6/z, [x29, #-256]");
TEST_SINGLE(ld1row(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 224), "ld1row {z30.s}, p6/z, [x29, #224]");
TEST_SINGLE(ld1rqd(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rqd {z30.d}, p6/z, [x29]");
TEST_SINGLE(ld1rqd(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -128), "ld1rqd {z30.d}, p6/z, [x29, #-128]");
TEST_SINGLE(ld1rqd(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 112), "ld1rqd {z30.d}, p6/z, [x29, #112]");
TEST_SINGLE(ld1rod(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rod {z30.d}, p6/z, [x29]");
TEST_SINGLE(ld1rod(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -256), "ld1rod {z30.d}, p6/z, [x29, #-256]");
TEST_SINGLE(ld1rod(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 224), "ld1rod {z30.d}, p6/z, [x29, #224]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE load multiple structures (scalar plus immediate)") {
TEST_SINGLE(ld2b(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, 0), "ld2b {z31.b, z0.b}, p6/z, [x29]");
TEST_SINGLE(ld2b(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, 0), "ld2b {z26.b, z27.b}, p6/z, [x29]");