CodeEmitter: Implement support for SVE NT loads

This commit is contained in:
Ryan Houdek 2024-07-10 23:06:19 -07:00
parent 5fe405e1fb
commit c9efb75714
No known key found for this signature in database
2 changed files with 47 additions and 1 deletions

View File

@ -2569,7 +2569,19 @@ public:
}
// SVE contiguous non-temporal load (scalar plus immediate)
// XXX:
void ldnt1b(ZRegister zt, PRegister pg, Register rn, int32_t Imm = 0) {
SVEContiguousNontemporalLoad(0b00, zt, pg, rn, Imm);
}
void ldnt1h(ZRegister zt, PRegister pg, Register rn, int32_t Imm = 0) {
SVEContiguousNontemporalLoad(0b01, zt, pg, rn, Imm);
}
void ldnt1w(ZRegister zt, PRegister pg, Register rn, int32_t Imm = 0) {
SVEContiguousNontemporalLoad(0b10, zt, pg, rn, Imm);
}
void ldnt1d(ZRegister zt, PRegister pg, Register rn, int32_t Imm = 0) {
SVEContiguousNontemporalLoad(0b11, zt, pg, rn, Imm);
}
// SVE contiguous non-temporal load (scalar plus scalar)
// XXX:
// SVE load multiple structures (scalar plus immediate)
@ -4492,6 +4504,22 @@ private:
dc32(Instr);
}
// SVE contiguous non-temporal load (scalar plus immediate)
void SVEContiguousNontemporalLoad(uint32_t msz, ZRegister zt, PRegister pg, Register rn, int32_t imm) {
LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");
LOGMAN_THROW_AA_FMT(imm >= -8 && imm <= 7,
"Invalid loadstore offset ({}). Must be between [-8, 7]", imm);
const auto imm4 = static_cast<uint32_t>(imm) & 0xF;
uint32_t Instr = 0b1010'0100'0000'0000'1110'0000'0000'0000;
Instr |= msz << 23;
Instr |= imm4 << 16;
Instr |= pg.Idx() << 10;
Instr |= Encode_rn(rn);
Instr |= zt.Idx();
dc32(Instr);
}
// SVE contiguous non-temporal store (scalar plus immediate)
void SVEContiguousNontemporalStore(uint32_t msz, ZRegister zt, PRegister pg, Register rn, int32_t imm) {
LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

View File

@ -3982,6 +3982,24 @@ TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE load and broadcast element
TEST_SINGLE(ld1rd(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 504), "ld1rd {z30.d}, p6/z, [x29, #504]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE contiguous non-temporal load (scalar plus immediate)") {
TEST_SINGLE(ldnt1b(ZReg::z31, PReg::p6, Reg::r29, 0), "ldnt1b {z31.b}, p6/z, [x29]");
TEST_SINGLE(ldnt1b(ZReg::z31, PReg::p6, Reg::r29, -8), "ldnt1b {z31.b}, p6/z, [x29, #-8, mul vl]");
TEST_SINGLE(ldnt1b(ZReg::z31, PReg::p6, Reg::r29, 7), "ldnt1b {z31.b}, p6/z, [x29, #7, mul vl]");
TEST_SINGLE(ldnt1h(ZReg::z31, PReg::p6, Reg::r29, 0), "ldnt1h {z31.h}, p6/z, [x29]");
TEST_SINGLE(ldnt1h(ZReg::z31, PReg::p6, Reg::r29, -8), "ldnt1h {z31.h}, p6/z, [x29, #-8, mul vl]");
TEST_SINGLE(ldnt1h(ZReg::z31, PReg::p6, Reg::r29, 7), "ldnt1h {z31.h}, p6/z, [x29, #7, mul vl]");
TEST_SINGLE(ldnt1w(ZReg::z31, PReg::p6, Reg::r29, 0), "ldnt1w {z31.s}, p6/z, [x29]");
TEST_SINGLE(ldnt1w(ZReg::z31, PReg::p6, Reg::r29, -8), "ldnt1w {z31.s}, p6/z, [x29, #-8, mul vl]");
TEST_SINGLE(ldnt1w(ZReg::z31, PReg::p6, Reg::r29, 7), "ldnt1w {z31.s}, p6/z, [x29, #7, mul vl]");
TEST_SINGLE(ldnt1d(ZReg::z31, PReg::p6, Reg::r29, 0), "ldnt1d {z31.d}, p6/z, [x29]");
TEST_SINGLE(ldnt1d(ZReg::z31, PReg::p6, Reg::r29, -8), "ldnt1d {z31.d}, p6/z, [x29, #-8, mul vl]");
TEST_SINGLE(ldnt1d(ZReg::z31, PReg::p6, Reg::r29, 7), "ldnt1d {z31.d}, p6/z, [x29, #7, mul vl]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE load multiple structures (scalar plus scalar)") {
TEST_SINGLE(ld2b(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2b {z31.b, z0.b}, p6/z, [x29, x30]");
TEST_SINGLE(ld2b(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2b {z26.b, z27.b}, p6/z, [x29, x30]");