[lldb][AArch64] Add SME2's ZT0 register (#70205)

SME2 is documented as part of the main SME supplement:
https://developer.arm.com/documentation/ddi0616/latest/

The one change for debug is this new ZT0 register. This register
contains data to be used with new table lookup instructions.
It's size is always 512 bits (not scalable) and can be
interpreted in many different ways depending on the instructions
that use it. 

The kernel has implemented this as a new register set containing
this single register. It always returns register data (with no header,
unlike ZA which does have a header).

https://docs.kernel.org/arch/arm64/sme.html

ZT0 is only active when ZA is active (when SVCR.ZA is 1). In the 
inactive state the kernel returns 0s for its contents. Therefore
lldb doesn't need to create 0s like it does for ZA. 

However, we will skip restoring the value of ZT0 if we know that
ZA is inactive. As writing to an inactive ZT0 sets SVCR.ZA to 1,
which is not desireable as it would activate ZA also. Whether
SVCR.ZA is set will be determined only by the ZA data we restore.

Due to this, I've added a new save/restore kind SME2. This is easier
than accounting for the variable length ZA in the SME data. We'll only
save an SME2 data block if ZA is active. If it's not we can get fresh
0s back from the kernel for ZT0 anyway so there's nothing for us to
restore.

This new register will only show up if the system has SME2 therefore
the SME set presented to the user may change, and I've had to account
for that in in a few places.

I've referred to it internally as simply "ZT" as the kernel does in
NT_ARM_ZT, but the architecture refers to the specific register as "ZT0"
so that's what you'll see in lldb.

```
(lldb) register read -s 6
Scalable Matrix Extension Registers:
      svcr = 0x0000000000000000
       svg = 0x0000000000000004
        za = {0x00 <...> 0x00}
       zt0 = {0x00 <...> 0x00}
```
This commit is contained in:
David Spickett 2023-11-01 10:40:25 +00:00 committed by GitHub
parent aaba3761db
commit b8150c8f12
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 314 additions and 50 deletions

View File

@ -1271,6 +1271,10 @@ class Base(unittest2.TestCase):
def isAArch64SME(self):
return self.isAArch64() and "sme" in self.getCPUInfo()
def isAArch64SME2(self):
# If you have sme2, you also have sme.
return self.isAArch64() and "sme2" in self.getCPUInfo()
def isAArch64SMEFA64(self):
# smefa64 allows the use of the full A64 instruction set in streaming
# mode. This is required by certain test programs to setup register

View File

@ -45,6 +45,11 @@
#define NT_ARM_ZA 0x40c /* ARM Scalable Matrix Extension, Array Storage */
#endif
#ifndef NT_ARM_ZT
#define NT_ARM_ZT \
0x40d /* ARM Scalable Matrix Extension 2, lookup table register */
#endif
#ifndef NT_ARM_PAC_MASK
#define NT_ARM_PAC_MASK 0x406 /* Pointer authentication code masks */
#endif
@ -104,6 +109,17 @@ NativeRegisterContextLinux::CreateHostNativeRegisterContextLinux(
.Success())
opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskZA);
// SME's ZT0 is a 512 bit register.
std::array<uint8_t, 64> zt_reg;
ioVec.iov_base = zt_reg.data();
ioVec.iov_len = zt_reg.size();
regset = NT_ARM_ZT;
if (NativeProcessLinux::PtraceWrapper(PTRACE_GETREGSET,
native_thread.GetID(), &regset,
&ioVec, zt_reg.size())
.Success())
opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskZT);
NativeProcessLinux &process = native_thread.GetProcess();
std::optional<uint64_t> auxv_at_hwcap =
@ -148,6 +164,7 @@ NativeRegisterContextLinux_arm64::NativeRegisterContextLinux_arm64(
::memset(&m_pac_mask, 0, sizeof(m_pac_mask));
::memset(&m_tls_regs, 0, sizeof(m_tls_regs));
::memset(&m_sme_pseudo_regs, 0, sizeof(m_sme_pseudo_regs));
std::fill(m_zt_reg.begin(), m_zt_reg.end(), 0);
m_mte_ctrl_reg = 0;
@ -164,6 +181,7 @@ NativeRegisterContextLinux_arm64::NativeRegisterContextLinux_arm64(
m_pac_mask_is_valid = false;
m_mte_ctrl_is_valid = false;
m_tls_is_valid = false;
m_zt_buffer_is_valid = false;
// SME adds the tpidr2 register
m_tls_size = GetRegisterInfo().IsSSVEPresent() ? sizeof(m_tls_regs)
@ -355,6 +373,15 @@ NativeRegisterContextLinux_arm64::ReadRegister(const RegisterInfo *reg_info,
// storage. Therefore its effective byte offset is always 0 even if it
// isn't 0 within the SME register set.
src = (uint8_t *)GetZABuffer() + GetZAHeaderSize();
} else if (GetRegisterInfo().IsSMERegZT(reg)) {
// Unlike ZA, the kernel will return register data for ZT0 when ZA is not
// enabled. This data will be all 0s so we don't have to invent anything
// like we did for ZA.
error = ReadZT();
if (error.Fail())
return error;
src = (uint8_t *)GetZTBuffer();
} else {
error = ReadSMESVG();
if (error.Fail())
@ -552,22 +579,31 @@ Status NativeRegisterContextLinux_arm64::WriteRegister(
return WriteTLS();
} else if (IsSME(reg)) {
if (!GetRegisterInfo().IsSMERegZA(reg))
if (GetRegisterInfo().IsSMERegZA(reg)) {
error = ReadZA();
if (error.Fail())
return error;
// ZA is part of the SME set but not stored with the other SME registers.
// So its byte offset is effectively always 0.
dst = (uint8_t *)GetZABuffer() + GetZAHeaderSize();
::memcpy(dst, reg_value.GetBytes(), reg_info->byte_size);
// While this is writing a header that contains a vector length, the only
// way to change that is via the vg register. So here we assume the length
// will always be the current length and no reconfigure is needed.
return WriteZA();
} else if (GetRegisterInfo().IsSMERegZT(reg)) {
error = ReadZT();
if (error.Fail())
return error;
dst = (uint8_t *)GetZTBuffer();
::memcpy(dst, reg_value.GetBytes(), reg_info->byte_size);
return WriteZT();
} else
return Status("Writing to SVG or SVCR is not supported.");
error = ReadZA();
if (error.Fail())
return error;
// ZA is part of the SME set but not stored with the other SME registers.
// So its byte offset is effectively always 0.
dst = (uint8_t *)GetZABuffer() + GetZAHeaderSize();
::memcpy(dst, reg_value.GetBytes(), reg_info->byte_size);
// While this is writing a header that contains a vector length, the only
// way to change that is via the vg register. So here we assume the length
// will always be the current length and no reconfigure is needed.
return WriteZA();
}
return Status("Failed to write register value");
@ -580,7 +616,8 @@ enum RegisterSetType : uint32_t {
// Pointer authentication registers are read only, so not included here.
MTE,
TLS,
SME, // ZA only, SVCR and SVG are pseudo registers.
SME, // ZA only, because SVCR and SVG are pseudo registers.
SME2, // ZT only.
};
static uint8_t *AddRegisterSetType(uint8_t *dst,
@ -624,6 +661,21 @@ NativeRegisterContextLinux_arm64::CacheAllRegisters(uint32_t &cached_size) {
error = ReadZA();
if (error.Fail())
return error;
// We will only be restoring ZT data if ZA is active. As writing to an
// inactive ZT enables ZA, which may not be desireable.
if (
// If we have ZT0, or in other words, if we have SME2.
GetRegisterInfo().IsZTPresent() &&
// And ZA is active, which means that ZT0 is also active.
m_za_header.size > sizeof(m_za_header)) {
cached_size += sizeof(RegisterSetType) + GetZTBufferSize();
// The kernel handles an inactive ZT0 for us, and it will read as 0s if
// inactive (unlike ZA where we fake that behaviour).
error = ReadZT();
if (error.Fail())
return error;
}
}
// If SVE is enabled we need not copy FPR separately.
@ -731,6 +783,19 @@ Status NativeRegisterContextLinux_arm64::ReadAllRegisterValues(
m_za_header.size);
}
// If ZT0 is present and we are going to be restoring an active ZA (which
// implies an active ZT0), then restore ZT0 after ZA has been set. This
// prevents us enabling ZA accidentally after the restore of ZA disabled it.
// If we leave ZA/ZT0 inactive and read ZT0, the kernel returns 0s. Therefore
// there's nothing for us to restore if ZA was originally inactive.
if (
// If we have SME2 and therefore ZT0.
GetRegisterInfo().IsZTPresent() &&
// And ZA is enabled.
m_za_header.size > sizeof(m_za_header))
dst = AddSavedRegisters(dst, RegisterSetType::SME2, GetZTBuffer(),
GetZTBufferSize());
if (GetRegisterInfo().IsMTEPresent()) {
dst = AddSavedRegisters(dst, RegisterSetType::MTE, GetMTEControl(),
GetMTEControlSize());
@ -874,6 +939,14 @@ Status NativeRegisterContextLinux_arm64::WriteAllRegisterValues(
error = ReadZA();
src += GetZABufferSize();
break;
case RegisterSetType::SME2:
// Doing this would activate an inactive ZA, however we will only get here
// if the state we are restoring had an active ZA. Restoring ZT0 will
// always come after restoring ZA.
error = RestoreRegisters(
GetZTBuffer(), &src, GetZTBufferSize(), m_zt_buffer_is_valid,
std::bind(&NativeRegisterContextLinux_arm64::WriteZT, this));
break;
}
if (error.Fail())
@ -1063,6 +1136,7 @@ void NativeRegisterContextLinux_arm64::InvalidateAllRegisters() {
m_pac_mask_is_valid = false;
m_mte_ctrl_is_valid = false;
m_tls_is_valid = false;
m_zt_buffer_is_valid = false;
// Update SVE and ZA registers in case there is change in configuration.
ConfigureRegisterContext();
@ -1300,10 +1374,48 @@ Status NativeRegisterContextLinux_arm64::WriteZA() {
m_za_buffer_is_valid = false;
m_za_header_is_valid = false;
// Writing to ZA may enable ZA, which means ZT0 may change too.
m_zt_buffer_is_valid = false;
return WriteRegisterSet(&ioVec, GetZABufferSize(), NT_ARM_ZA);
}
Status NativeRegisterContextLinux_arm64::ReadZT() {
Status error;
if (m_zt_buffer_is_valid)
return error;
struct iovec ioVec;
ioVec.iov_base = GetZTBuffer();
ioVec.iov_len = GetZTBufferSize();
error = ReadRegisterSet(&ioVec, GetZTBufferSize(), NT_ARM_ZT);
m_zt_buffer_is_valid = error.Success();
return error;
}
Status NativeRegisterContextLinux_arm64::WriteZT() {
Status error;
error = ReadZT();
if (error.Fail())
return error;
struct iovec ioVec;
ioVec.iov_base = GetZTBuffer();
ioVec.iov_len = GetZTBufferSize();
m_zt_buffer_is_valid = false;
// Writing to an inactive ZT0 will enable ZA as well, which invalidates our
// current copy of it.
m_za_buffer_is_valid = false;
m_za_header_is_valid = false;
return WriteRegisterSet(&ioVec, GetZTBufferSize(), NT_ARM_ZT);
}
void NativeRegisterContextLinux_arm64::ConfigureRegisterContext() {
// ConfigureRegisterContext gets called from InvalidateAllRegisters
// on every stop and configures SVE vector length and whether we are in

View File

@ -83,6 +83,7 @@ private:
bool m_fpu_is_valid;
bool m_sve_buffer_is_valid;
bool m_mte_ctrl_is_valid;
bool m_zt_buffer_is_valid;
bool m_sve_header_is_valid;
bool m_za_buffer_is_valid;
@ -129,6 +130,9 @@ private:
struct tls_regs m_tls_regs;
// SME2's ZT is a 512 bit register.
std::array<uint8_t, 64> m_zt_reg;
bool IsGPR(unsigned reg) const;
bool IsFPR(unsigned reg) const;
@ -163,6 +167,10 @@ private:
// Instead use WriteZA and ensure you have the correct ZA buffer size set
// beforehand if you wish to disable it.
Status ReadZT();
Status WriteZT();
// SVCR is a pseudo register and we do not allow writes to it.
Status ReadSMEControl();
@ -190,6 +198,8 @@ private:
void *GetSMEPseudoBuffer() { return &m_sme_pseudo_regs; }
void *GetZTBuffer() { return m_zt_reg.data(); }
void *GetSVEBuffer() { return m_sve_ptrace_payload.data(); }
size_t GetSVEHeaderSize() { return sizeof(m_sve_header); }
@ -210,6 +220,8 @@ private:
size_t GetSMEPseudoBufferSize() { return sizeof(m_sme_pseudo_regs); }
size_t GetZTBufferSize() { return m_zt_reg.size(); }
llvm::Error ReadHardwareDebugInfo() override;
llvm::Error WriteHardwareDebugRegs(DREGType hwbType) override;

View File

@ -90,6 +90,10 @@ static lldb_private::RegisterInfo g_register_infos_sme[] = {
{"za", nullptr, 16, 0, lldb::eEncodingVector, lldb::eFormatVectorOfUInt8,
KIND_ALL_INVALID, nullptr, nullptr, nullptr}};
static lldb_private::RegisterInfo g_register_infos_sme2[] = {
{"zt0", nullptr, 64, 0, lldb::eEncodingVector, lldb::eFormatVectorOfUInt8,
KIND_ALL_INVALID, nullptr, nullptr, nullptr}};
// Number of register sets provided by this context.
enum {
k_num_gpr_registers = gpr_w28 - gpr_x0 + 1,
@ -98,6 +102,8 @@ enum {
k_num_mte_register = 1,
// Number of TLS registers is dynamic so it is not listed here.
k_num_pauth_register = 2,
// SME2's ZT0 will also be added to this set if present. So this number is
// only for SME1 registers.
k_num_sme_register = 3,
k_num_register_sets_default = 2,
k_num_register_sets = 3
@ -253,7 +259,7 @@ RegisterInfoPOSIX_arm64::RegisterInfoPOSIX_arm64(
AddRegSetTLS(m_opt_regsets.AllSet(eRegsetMaskSSVE));
if (m_opt_regsets.AnySet(eRegsetMaskSSVE))
AddRegSetSME();
AddRegSetSME(m_opt_regsets.AnySet(eRegsetMaskZT));
m_register_info_count = m_dynamic_reg_infos.size();
m_register_info_p = m_dynamic_reg_infos.data();
@ -358,21 +364,35 @@ void RegisterInfoPOSIX_arm64::AddRegSetTLS(bool has_tpidr2) {
m_dynamic_reg_sets.back().registers = m_tls_regnum_collection.data();
}
void RegisterInfoPOSIX_arm64::AddRegSetSME() {
uint32_t sme_regnum = m_dynamic_reg_infos.size();
for (uint32_t i = 0; i < k_num_sme_register; i++) {
m_sme_regnum_collection.push_back(sme_regnum + i);
void RegisterInfoPOSIX_arm64::AddRegSetSME(bool has_zt) {
const uint32_t first_sme_regnum = m_dynamic_reg_infos.size();
uint32_t sme_regnum = first_sme_regnum;
for (uint32_t i = 0; i < k_num_sme_register; ++i, ++sme_regnum) {
m_sme_regnum_collection.push_back(sme_regnum);
m_dynamic_reg_infos.push_back(g_register_infos_sme[i]);
m_dynamic_reg_infos[sme_regnum + i].byte_offset =
m_dynamic_reg_infos[sme_regnum + i - 1].byte_offset +
m_dynamic_reg_infos[sme_regnum + i - 1].byte_size;
m_dynamic_reg_infos[sme_regnum + i].kinds[lldb::eRegisterKindLLDB] =
sme_regnum + i;
m_dynamic_reg_infos[sme_regnum].byte_offset =
m_dynamic_reg_infos[sme_regnum - 1].byte_offset +
m_dynamic_reg_infos[sme_regnum - 1].byte_size;
m_dynamic_reg_infos[sme_regnum].kinds[lldb::eRegisterKindLLDB] = sme_regnum;
}
lldb_private::RegisterSet sme_regset = g_reg_set_sme_arm64;
if (has_zt) {
m_sme_regnum_collection.push_back(sme_regnum);
m_dynamic_reg_infos.push_back(g_register_infos_sme2[0]);
m_dynamic_reg_infos[sme_regnum].byte_offset =
m_dynamic_reg_infos[sme_regnum - 1].byte_offset +
m_dynamic_reg_infos[sme_regnum - 1].byte_size;
m_dynamic_reg_infos[sme_regnum].kinds[lldb::eRegisterKindLLDB] = sme_regnum;
sme_regset.num_registers += 1;
}
m_per_regset_regnum_range[m_register_set_count] =
std::make_pair(sme_regnum, m_dynamic_reg_infos.size());
m_dynamic_reg_sets.push_back(g_reg_set_sme_arm64);
std::make_pair(first_sme_regnum, m_dynamic_reg_infos.size());
m_dynamic_reg_sets.push_back(sme_regset);
m_dynamic_reg_sets.back().registers = m_sme_regnum_collection.data();
// When vg is written during streaming mode, svg will also change, as vg and
@ -488,6 +508,12 @@ bool RegisterInfoPOSIX_arm64::IsSMERegZA(unsigned reg) const {
return reg == m_sme_regnum_collection[2];
}
bool RegisterInfoPOSIX_arm64::IsSMERegZT(unsigned reg) const {
// ZT0 is part of the SME register set only if SME2 is present.
return m_sme_regnum_collection.size() >= 4 &&
reg == m_sme_regnum_collection[3];
}
bool RegisterInfoPOSIX_arm64::IsPAuthReg(unsigned reg) const {
return llvm::is_contained(pauth_regnum_collection, reg);
}

View File

@ -31,6 +31,7 @@ public:
eRegsetMaskMTE = 8,
eRegsetMaskTLS = 16,
eRegsetMaskZA = 32,
eRegsetMaskZT = 64,
eRegsetMaskDynamic = ~1,
};
@ -107,7 +108,7 @@ public:
void AddRegSetTLS(bool has_tpidr2);
void AddRegSetSME();
void AddRegSetSME(bool has_zt);
uint32_t ConfigureVectorLengthSVE(uint32_t sve_vq);
@ -123,6 +124,7 @@ public:
bool IsSVEPresent() const { return m_opt_regsets.AnySet(eRegsetMaskSVE); }
bool IsSSVEPresent() const { return m_opt_regsets.AnySet(eRegsetMaskSSVE); }
bool IsZAPresent() const { return m_opt_regsets.AnySet(eRegsetMaskZA); }
bool IsZTPresent() const { return m_opt_regsets.AnySet(eRegsetMaskZT); }
bool IsPAuthPresent() const { return m_opt_regsets.AnySet(eRegsetMaskPAuth); }
bool IsMTEPresent() const { return m_opt_regsets.AnySet(eRegsetMaskMTE); }
bool IsTLSPresent() const { return m_opt_regsets.AnySet(eRegsetMaskTLS); }
@ -136,6 +138,7 @@ public:
bool IsTLSReg(unsigned reg) const;
bool IsSMEReg(unsigned reg) const;
bool IsSMERegZA(unsigned reg) const;
bool IsSMERegZT(unsigned reg) const;
uint32_t GetRegNumSVEZ0() const;
uint32_t GetRegNumSVEFFR() const;

View File

@ -127,6 +127,10 @@ constexpr RegsetDesc AARCH64_ZA_Desc[] = {
{llvm::Triple::Linux, llvm::Triple::aarch64, llvm::ELF::NT_ARM_ZA},
};
constexpr RegsetDesc AARCH64_ZT_Desc[] = {
{llvm::Triple::Linux, llvm::Triple::aarch64, llvm::ELF::NT_ARM_ZT},
};
constexpr RegsetDesc AARCH64_PAC_Desc[] = {
{llvm::Triple::Linux, llvm::Triple::aarch64, llvm::ELF::NT_ARM_PAC_MASK},
};

View File

@ -136,6 +136,14 @@ class RegisterCommandsTestCase(TestBase):
rows.append(" ".join([byte] * vl))
return "{" + " ".join(rows) + "}"
def make_zt0_value(self, generator):
num_bytes = 512 // 8
elements = []
for i in range(num_bytes):
elements.append("0x{:02x}".format(generator(i)))
return "{" + " ".join(elements) + "}"
@no_debug_info_test
@skipIf(archs=no_match(["aarch64"]))
@skipIf(oslist=no_match(["linux"]))
@ -179,16 +187,11 @@ class RegisterCommandsTestCase(TestBase):
# SVCR is read only so we do not test writing to it.
@no_debug_info_test
@skipIf(archs=no_match(["aarch64"]))
@skipIf(oslist=no_match(["linux"]))
def test_aarch64_dynamic_regset_config_sme_za_disabled(self):
"""Test that ZA shows as 0s when disabled and can be enabled by writing
to it."""
if not self.isAArch64SME():
self.skipTest("SME must be present.")
def write_to_enable_za_test(self, has_zt0, write_za_first):
# Run a test where we start with ZA disabled, and write to either ZA
# or ZT0 which causes them to become enabled.
# No argument, so ZA will be disabled when we break.
# No argument, so ZA and ZT0 will be disabled when we break.
register_sets = self.setup_register_config_test()
# vg is the non-streaming vg as we are in non-streaming mode, so we need
@ -205,14 +208,71 @@ class RegisterCommandsTestCase(TestBase):
svl = svg * 8
# A disabled ZA is shown as all 0s.
self.expect("register read za", substrs=[self.make_za_value(svl, lambda r: 0)])
disabled_za = self.make_za_value(svl, lambda r: 0)
self.expect("register read za", substrs=[disabled_za])
disabled_zt0 = self.make_zt0_value(lambda n: 0)
if has_zt0:
# A disabled zt0 is all 0s.
self.expect("register read zt0", substrs=[disabled_zt0])
# Writing to ZA or ZTO enables both and we should be able to read the
# value back.
za_value = self.make_za_value(svl, lambda r: r + 1)
# Writing to it enables ZA, so the value should be there when we read
# it back.
self.runCmd("register write za '{}'".format(za_value))
self.expect("register read za", substrs=[za_value])
zt0_value = self.make_zt0_value(lambda n: n + 1)
if write_za_first:
# This enables ZA and ZT0.
self.runCmd("register write za '{}'".format(za_value))
self.expect("register read za", substrs=[za_value])
if has_zt0:
# ZT0 is still 0s at this point, though it is active.
self.expect("register read zt0", substrs=[disabled_zt0])
# Now write ZT0 to we can check it reads back correctly.
self.runCmd("register write zt0 '{}'".format(zt0_value))
self.expect("register read zt0", substrs=[zt0_value])
else:
if not has_zt0:
self.fail("Cannot write to zt0 when sme2 is not present.")
# Instead use the write of ZT0 to activate ZA.
self.runCmd("register write zt0 '{}'".format(zt0_value))
self.expect("register read zt0", substrs=[zt0_value])
# ZA will be active but 0s at this point, but it is active.
self.expect("register read zt0", substrs=[disabled_za])
# Write and read back ZA.
self.runCmd("register write za '{}'".format(za_value))
self.expect("register read za", substrs=[za_value])
# Now SVCR.ZA should be set, which is bit 1.
self.expect("register read svcr", substrs=["0x0000000000000002"])
# SVCR is read only so we do not test writing to it.
@no_debug_info_test
@skipIf(archs=no_match(["aarch64"]))
@skipIf(oslist=no_match(["linux"]))
def test_aarch64_dynamic_regset_config_sme_write_za_to_enable(self):
"""Test that ZA and ZT0 (if present) shows as 0s when disabled and
can be enabled by writing to ZA."""
if not self.isAArch64SME():
self.skipTest("SME must be present.")
self.write_to_enable_za_test(self.isAArch64SME2(), True)
@no_debug_info_test
@skipIf(archs=no_match(["aarch64"]))
@skipIf(oslist=no_match(["linux"]))
def test_aarch64_dynamic_regset_config_sme_write_zt0_to_enable(self):
"""Test that ZA and ZT0 (if present) shows as 0s when disabled and
can be enabled by writing to ZT0."""
if not self.isAArch64SME():
self.skipTest("SME must be present.")
if not self.isAArch64SME2():
self.skipTest("SME2 must be present.")
self.write_to_enable_za_test(True, True)

View File

@ -1,5 +1,5 @@
C_SOURCES := main.c
CFLAGS_EXTRAS := -march=armv8-a+sve+sme
CFLAGS_EXTRAS := -march=armv8-a+sve+sme+sme2
include Makefile.rules

View File

@ -1,5 +1,6 @@
"""
Test the AArch64 SME ZA register is saved and restored around expressions.
Test the AArch64 SME ZA and ZT0 registers are saved and restored around
expressions.
This attempts to cover expressions that change the following:
* ZA enabled or not.
@ -105,6 +106,21 @@ class AArch64ZATestCase(TestBase):
# When ZA is disabled, lldb will show ZA as all 0s.
self.expect("register read za", substrs=[self.make_za_value(vl, lambda row: 0)])
def make_zt0_value(self, generator):
num_bytes = 512 // 8
elements = []
for i in range(num_bytes):
elements.append("0x{:02x}".format(generator(i)))
return "{" + " ".join(elements) + "}"
def check_zt0(self):
self.expect("register read zt0", substrs=[self.make_zt0_value(lambda n: n + 1)])
def check_zt0_disabled(self):
# Like ZA, zt0 reads as 0 when SVCR.ZA is not set.
self.expect("register read zt0", substrs=[self.make_zt0_value(lambda n: 0)])
def za_expr_test_impl(self, sve_mode, za_state, swap_start_vl):
if not self.isAArch64SMEFA64():
self.skipTest("SME and the smefa64 extension must be present.")
@ -168,11 +184,17 @@ class AArch64ZATestCase(TestBase):
if za_state == ZA.Enabled:
svcr_value += 2
has_zt0 = self.isAArch64SME2()
def check_regs():
if za_state == ZA.Enabled:
self.check_za(start_vl)
if has_zt0:
self.check_zt0()
else:
self.check_za_disabled(start_vl)
if has_zt0:
self.check_zt0_disabled()
# svg and vg are in units of 8 bytes.
self.assertEqual(start_vl, self.read_svg() * 8)

View File

@ -1,7 +1,9 @@
#include <asm/hwcap.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/auxv.h>
#include <sys/prctl.h>
// Important details for this program:
@ -30,8 +32,13 @@
#define SMSTOP_SM SM_INST(2)
#define SMSTOP_ZA SM_INST(4)
#ifndef HWCAP2_SME2
#define HWCAP2_SME2 (1UL << 37)
#endif
int start_vl = 0;
int other_vl = 0;
bool has_zt0 = false;
void write_sve_regs() {
// We assume the smefa64 feature is present, which allows ffr access
@ -143,7 +150,7 @@ void write_sve_regs_expr() {
asm volatile("cpy z31.b, p15/z, #33\n\t");
}
void set_za_register(int svl, int value_offset) {
void set_sme_registers(int svl, uint8_t value_offset) {
#define MAX_VL_BYTES 256
uint8_t data[MAX_VL_BYTES];
@ -158,6 +165,17 @@ void set_za_register(int svl, int value_offset) {
"r"(&data)
: "w12");
}
#undef MAX_VL_BYTES
if (has_zt0) {
#define ZTO_LEN (512 / 8)
uint8_t data[ZTO_LEN];
for (unsigned i = 0; i < ZTO_LEN; ++i)
data[i] = i + value_offset;
asm volatile("ldr zt0, [%0]" ::"r"(&data));
#undef ZT0_LEN
}
}
void expr_disable_za() {
@ -167,21 +185,21 @@ void expr_disable_za() {
void expr_enable_za() {
SMSTART_ZA;
set_za_register(start_vl, 2);
set_sme_registers(start_vl, 2);
write_sve_regs_expr();
}
void expr_start_vl() {
prctl(PR_SME_SET_VL, start_vl);
SMSTART_ZA;
set_za_register(start_vl, 4);
set_sme_registers(start_vl, 4);
write_sve_regs_expr();
}
void expr_other_vl() {
prctl(PR_SME_SET_VL, other_vl);
SMSTART_ZA;
set_za_register(other_vl, 5);
set_sme_registers(other_vl, 5);
write_sve_regs_expr();
}
@ -209,6 +227,9 @@ int main(int argc, char *argv[]) {
start_vl = atoi(argv[3]);
other_vl = atoi(argv[4]);
if ((getauxval(AT_HWCAP2) & HWCAP2_SME2))
has_zt0 = true;
prctl(PR_SME_SET_VL, start_vl);
if (ssve)
@ -216,7 +237,7 @@ int main(int argc, char *argv[]) {
if (za) {
SMSTART_ZA;
set_za_register(start_vl, 1);
set_sme_registers(start_vl, 1);
}
write_sve_regs();