Support Armv8 CPUs without atomics instructions

Issue:
https://gitee.com/openharmony/arkcompiler_runtime_core/issues/I9W369

Reason:
Support Armv8 CPUs without atomics instructions

Description:
Support Armv8 CPUs without atomics instructions

Tests:
ninja all tests

Signed-off-by: Sidorov Aleksei <aleksei.sidorov@huawei.com>
This commit is contained in:
Sidorov Aleksei 2024-06-07 16:29:16 +03:00
parent 7e8e5f83df
commit 33c3888e5c
19 changed files with 114 additions and 28 deletions

View File

@ -698,6 +698,7 @@ options:
- crc32
- sse42
- jscvt
- atomics
description: Set compiler CPU features
tags: [perf]
delimiter: ","

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023 Huawei Device Co., Ltd.
* Copyright (c) 2021-2024 Huawei Device Co., Ltd.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@ -89,6 +89,9 @@ public:
if (CpuFeaturesHasJscvt()) {
EnableCpuFeature(JSCVT);
}
if (CpuFeaturesHasAtomics()) {
EnableCpuFeature(ATOMICS);
}
break;
}
case Arch::AARCH32:

View File

@ -285,6 +285,15 @@ void Codegen::IntrinsicSaveTlabStatsSafe([[maybe_unused]] IntrinsicInst *inst, [
GetEncoder()->SetFalseResult();
}
void Codegen::EmitAtomicByteOr(Reg addr, Reg value)
{
bool fastEncoding = true;
if (GetArch() == Arch::AARCH64 && !g_options.IsCpuFeatureEnabled(CpuFeature::ATOMICS)) {
fastEncoding = false;
}
GetEncoder()->EncodeAtomicByteOr(addr, value, fastEncoding);
}
#ifdef INTRINSIC_SLOW_PATH_ENTRY_ENABLED
// NOLINTNEXTLINE(readability-function-size)
void Codegen::CreateIrtocIntrinsic(IntrinsicInst *inst, [[maybe_unused]] Reg dst, [[maybe_unused]] SRCREGS src)
@ -343,7 +352,7 @@ void Codegen::CreateIrtocIntrinsic(IntrinsicInst *inst, [[maybe_unused]] Reg dst
GetEncoder()->EncodeUnsignedExtendBytesToShorts(dst, src[0]);
break;
case RuntimeInterface::IntrinsicId::INTRINSIC_ATOMIC_BYTE_OR:
GetEncoder()->EncodeAtomicByteOr(src[FIRST_OPERAND], src[SECOND_OPERAND]);
EmitAtomicByteOr(src[FIRST_OPERAND], src[SECOND_OPERAND]);
break;
default:
UNREACHABLE();

View File

@ -434,6 +434,8 @@ private:
template <typename... Args>
void FillPostWrbCallParams(MemRef mem, Args &&...params);
void EmitAtomicByteOr(Reg addr, Reg value);
private:
ArenaAllocator *allocator_;
ArenaAllocator *localAllocator_;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023 Huawei Device Co., Ltd.
* Copyright (c) 2021-2024 Huawei Device Co., Ltd.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@ -41,6 +41,10 @@ void CodegenNative::CreateFrameInfo()
frame->SetSetupFrame(false);
// we don't need to save FP and LR registers only for leaf methods
frame->SetSaveFrameAndLinkRegs(!GetGraph()->GetMethodProperties().IsLeaf());
// we may use lr reg as temp only if we saved lr in the prologue
if (GetTarget().SupportLinkReg()) {
GetEncoder()->EnableLrAsTempReg(frame->GetSaveFrameAndLinkRegs());
}
// we never need to save unused registers in native mode
frame->SetSaveUnusedCalleeRegs(false);
// we have to sub/add SP in prologue/epilogue in the following cases:

View File

@ -221,7 +221,8 @@ void Encoder::EncodeCompareTest([[maybe_unused]] Reg dst, [[maybe_unused]] Reg s
SetFalseResult();
}
void Encoder::EncodeAtomicByteOr([[maybe_unused]] Reg addr, [[maybe_unused]] Reg value)
void Encoder::EncodeAtomicByteOr([[maybe_unused]] Reg addr, [[maybe_unused]] Reg value,
[[maybe_unused]] bool fastEncoding)
{
SetFalseResult();
}
@ -828,7 +829,7 @@ bool Encoder::IsLrAsTempRegEnabled() const
return enableLrAsTempReg_;
}
bool Encoder::IsLrAsTempRegEnabledAndReleased()
bool Encoder::IsLrAsTempRegEnabledAndReleased() const
{
return IsLrAsTempRegEnabled() && IsScratchRegisterReleased(GetTarget().GetLinkReg());
}
@ -843,7 +844,7 @@ size_t Encoder::GetFrameSize() const
return frameSize_;
}
bool Encoder::IsScratchRegisterReleased([[maybe_unused]] compiler::Reg reg)
bool Encoder::IsScratchRegisterReleased([[maybe_unused]] compiler::Reg reg) const
{
return false;
}
@ -853,6 +854,11 @@ size_t Encoder::GetScratchRegistersCount() const
return GetScratchRegistersMask().Count();
}
size_t Encoder::GetScratchRegistersWithLrCount() const
{
return GetScratchRegistersCount() + static_cast<size_t>(IsLrAsTempRegEnabledAndReleased());
}
RegMask Encoder::GetScratchRegistersMask() const
{
return 0;

View File

@ -209,7 +209,7 @@ public:
// Additional check for isnan-comparison
virtual void EncodeCompare(Reg dst, Reg src0, Reg src1, Condition cc);
virtual void EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc);
virtual void EncodeAtomicByteOr(Reg addr, Reg value);
virtual void EncodeAtomicByteOr(Reg addr, Reg value, bool fastEncoding);
struct ArgsCompressedStringCharAt {
Reg dst;
Reg str;
@ -356,8 +356,9 @@ public:
virtual Reg AcquireScratchRegister(TypeInfo type);
virtual void AcquireScratchRegister(Reg reg);
virtual void ReleaseScratchRegister(Reg reg);
virtual bool IsScratchRegisterReleased(Reg reg);
virtual bool IsScratchRegisterReleased(Reg reg) const;
size_t GetScratchRegistersCount() const;
size_t GetScratchRegistersWithLrCount() const;
virtual RegMask GetScratchRegistersMask() const;
size_t GetScratchFPRegistersCount() const;
virtual RegMask GetScratchFpRegistersMask() const;
@ -431,7 +432,7 @@ public:
bool IsLrAsTempRegEnabled() const;
bool IsLrAsTempRegEnabledAndReleased();
bool IsLrAsTempRegEnabledAndReleased() const;
NO_COPY_SEMANTIC(Encoder);
NO_MOVE_SEMANTIC(Encoder);

View File

@ -195,7 +195,7 @@ void Aarch32Encoder::ReleaseScratchRegister(Reg reg)
}
}
bool Aarch32Encoder::IsScratchRegisterReleased(Reg reg)
bool Aarch32Encoder::IsScratchRegisterReleased(Reg reg) const
{
if (reg == GetTarget().GetLinkReg()) {
return !lrAcquired_;
@ -2972,7 +2972,7 @@ void Aarch32Encoder::EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc
GetMasm()->Mov(ConvertTest(cc).Negate(), VixlReg(dst), 0x0);
}
void Aarch32Encoder::EncodeAtomicByteOr([[maybe_unused]] Reg addr, [[maybe_unused]] Reg value)
void Aarch32Encoder::EncodeAtomicByteOr(Reg addr, Reg value, [[maybe_unused]] bool fastEncoding)
{
/**
* .try:

View File

@ -341,7 +341,7 @@ public:
void EncodeCompare(Reg dst, Reg src0, Reg src1, Condition cc) override;
void EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc) override;
void EncodeAtomicByteOr(Reg addr, Reg value) override;
void EncodeAtomicByteOr(Reg addr, Reg value, bool fastEncoding) override;
void EncodeSelect(ArgsSelect &&args) override;
void EncodeSelect(ArgsSelectImm &&args) override;
@ -357,7 +357,7 @@ public:
Reg AcquireScratchRegister(TypeInfo type) override;
void AcquireScratchRegister(Reg reg) override;
void ReleaseScratchRegister(Reg reg) override;
bool IsScratchRegisterReleased(Reg reg) override;
bool IsScratchRegisterReleased(Reg reg) const override;
RegMask GetScratchRegistersMask() const override;
RegMask GetScratchFpRegistersMask() const override;
RegMask GetAvailableScratchRegisters() const override;

View File

@ -2446,12 +2446,48 @@ void Aarch64Encoder::EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc
GetMasm()->Cset(VixlReg(dst), ConvertTest(cc));
}
void Aarch64Encoder::EncodeAtomicByteOr(Reg addr, Reg value)
void Aarch64Encoder::EncodeAtomicByteOr(Reg addr, Reg value, bool fastEncoding)
{
if (fastEncoding) {
#ifndef NDEBUG
vixl::CPUFeaturesScope scope(GetMasm(), vixl::CPUFeatures::kAtomics);
vixl::CPUFeaturesScope scope(GetMasm(), vixl::CPUFeatures::kAtomics);
#endif
GetMasm()->Stsetb(VixlReg(value, BYTE_SIZE), MemOperand(VixlReg(addr)));
GetMasm()->Stsetb(VixlReg(value, BYTE_SIZE), MemOperand(VixlReg(addr)));
return;
}
// Slow encoding, should not be used in production code!!!
auto linkReg = GetTarget().GetLinkReg();
auto frameReg = GetTarget().GetFrameReg();
static constexpr size_t PAIR_OFFSET = 2 * DOUBLE_WORD_SIZE_BYTES;
ScopedTmpRegLazy tmp1(this);
ScopedTmpRegLazy tmp2(this);
Reg orValue;
Reg storeResult;
bool hasTemps = GetScratchRegistersWithLrCount() >= 2U;
if (hasTemps) {
tmp1.AcquireWithLr();
tmp2.AcquireWithLr();
orValue = tmp1.GetReg().As(INT32_TYPE);
storeResult = tmp2.GetReg().As(INT32_TYPE);
} else {
GetMasm()->stp(VixlReg(frameReg), VixlReg(linkReg),
MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
orValue = frameReg.As(INT32_TYPE);
storeResult = linkReg.As(INT32_TYPE);
}
auto *loop = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
GetMasm()->Bind(loop);
GetMasm()->Ldxrb(VixlReg(orValue), MemOperand(VixlReg(addr)));
GetMasm()->Orr(VixlReg(orValue), VixlReg(orValue), VixlReg(value, WORD_SIZE));
GetMasm()->Stxrb(VixlReg(storeResult), VixlReg(orValue), MemOperand(VixlReg(addr)));
GetMasm()->Cbnz(VixlReg(storeResult), loop);
if (!hasTemps) {
GetMasm()->ldp(VixlReg(frameReg), VixlReg(linkReg),
MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
}
}
void Aarch64Encoder::EncodeCmp(Reg dst, Reg src0, Reg src1, Condition cc)
@ -2812,7 +2848,7 @@ void Aarch64Encoder::ReleaseScratchRegister(Reg reg)
}
}
bool Aarch64Encoder::IsScratchRegisterReleased(Reg reg)
bool Aarch64Encoder::IsScratchRegisterReleased(Reg reg) const
{
if (reg == GetTarget().GetLinkReg()) {
return !lrAcquired_;

View File

@ -265,7 +265,7 @@ public:
void EncodeCompare(Reg dst, Reg src0, Reg src1, Condition cc) override;
void EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc) override;
void EncodeAtomicByteOr(Reg addr, Reg value) override;
void EncodeAtomicByteOr(Reg addr, Reg value, bool fastEncoding) override;
void EncodeSelect(ArgsSelect &&args) override;
void EncodeSelect(ArgsSelectImm &&args) override;
@ -345,7 +345,7 @@ public:
Reg AcquireScratchRegister(TypeInfo type) override;
void AcquireScratchRegister(Reg reg) override;
void ReleaseScratchRegister(Reg reg) override;
bool IsScratchRegisterReleased(Reg reg) override;
bool IsScratchRegisterReleased(Reg reg) const override;
RegMask GetScratchRegistersMask() const override;
RegMask GetScratchFpRegistersMask() const override;

View File

@ -2082,7 +2082,7 @@ void Amd64Encoder::EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc)
GetMasm()->set(ArchCcTest(cc), ArchReg(dst, BYTE_SIZE));
}
void Amd64Encoder::EncodeAtomicByteOr(Reg addr, Reg value)
void Amd64Encoder::EncodeAtomicByteOr(Reg addr, Reg value, [[maybe_unused]] bool fastEncoding)
{
GetMasm()->lock().or_(asmjit::x86::byte_ptr(ArchReg(addr)), ArchReg(value, ark::compiler::BYTE_SIZE));
}
@ -2871,7 +2871,7 @@ void Amd64Encoder::ReleaseScratchRegister(Reg reg)
(static_cast<Amd64RegisterDescription *>(GetRegfile()))->ReleaseScratchRegister(reg);
}
bool Amd64Encoder::IsScratchRegisterReleased(Reg reg)
bool Amd64Encoder::IsScratchRegisterReleased(Reg reg) const
{
return (static_cast<Amd64RegisterDescription *>(GetRegfile()))->IsScratchRegisterReleased(reg);
}

View File

@ -226,7 +226,7 @@ void Amd64RegisterDescription::ReleaseScratchRegister(Reg reg)
}
}
bool Amd64RegisterDescription::IsScratchRegisterReleased(Reg reg)
bool Amd64RegisterDescription::IsScratchRegisterReleased(Reg reg) const
{
if (reg.GetType().IsFloat()) {
return scratchv_.Has(reg.GetId());

View File

@ -190,7 +190,7 @@ public:
Reg AcquireScratchRegister(TypeInfo type);
void AcquireScratchRegister(Reg reg);
void ReleaseScratchRegister(Reg reg);
bool IsScratchRegisterReleased(Reg reg);
bool IsScratchRegisterReleased(Reg reg) const;
RegList GetScratchRegisters() const;
RegList GetScratchFPRegisters() const;
size_t GetScratchRegistersCount() const;
@ -298,7 +298,7 @@ public:
void EncodeCmp(Reg dst, Reg src0, Reg src1, Condition cc) override;
void EncodeCompare(Reg dst, Reg src0, Reg src1, Condition cc) override;
void EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc) override;
void EncodeAtomicByteOr(Reg addr, Reg value) override;
void EncodeAtomicByteOr(Reg addr, Reg value, bool fastEncoding) override;
void EncodeSelect(ArgsSelect &&args) override;
void EncodeSelect(ArgsSelectImm &&args) override;
@ -346,7 +346,7 @@ public:
Reg AcquireScratchRegister(TypeInfo type) override;
void AcquireScratchRegister(Reg reg) override;
void ReleaseScratchRegister(Reg reg) override;
bool IsScratchRegisterReleased(Reg reg) override;
bool IsScratchRegisterReleased(Reg reg) const override;
RegMask GetScratchRegistersMask() const override;
RegMask GetScratchFpRegistersMask() const override;
RegMask GetAvailableScratchRegisters() const override;

View File

@ -119,6 +119,7 @@ void CodegenFastPath::GeneratePrologue()
savedRegisters_ = GetUsedRegs() & RegMask(GetCalleeRegsMask(GetArch(), false));
if (GetTarget().SupportLinkReg() && hasRuntimeCalls) {
savedRegisters_ |= GetTarget().GetLinkReg().GetMask();
GetEncoder()->EnableLrAsTempReg(true);
}
if (GetUsedVRegs().Any()) {

View File

@ -225,7 +225,12 @@ std::string LLVMCompiler::GetCPUForArch(Arch arch)
std::string cpu;
switch (arch) {
case Arch::AARCH64:
#if defined(PANDA_TARGET_ARM64) && defined(PANDA_TARGET_LINUX)
// Avoid specifying default cortex for arm64-linux
cpu = g_options.WasSetLlvmCpu() ? g_options.GetLlvmCpu() : "";
#else
cpu = g_options.GetLlvmCpu();
#endif
break;
case Arch::X86_64:
cpu = g_options.WasSetLlvmCpu() ? g_options.GetLlvmCpu() : "";

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 Huawei Device Co., Ltd.
* Copyright (c) 2022-2024 Huawei Device Co., Ltd.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@ -33,6 +33,13 @@ bool CpuFeaturesHasJscvt()
// NOLINTNEXTLINE(hicpp-signed-bitwise)
return (hwcaps & HWCAP_JSCVT) != 0;
}
bool CpuFeaturesHasAtomics()
{
auto hwcaps = getauxval(AT_HWCAP);
// NOLINTNEXTLINE(hicpp-signed-bitwise)
return (hwcaps & HWCAP_ATOMICS) != 0;
}
#elif PANDA_TARGET_WINDOWS
bool CpuFeaturesHasCrc32()
{
@ -43,6 +50,11 @@ bool CpuFeaturesHasJscvt()
{
return false;
}
bool CpuFeaturesHasAtomics()
{
return false;
}
#else
#error "Unsupported target"
#endif

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 Huawei Device Co., Ltd.
* Copyright (c) 2022-2024 Huawei Device Co., Ltd.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@ -25,4 +25,9 @@ bool CpuFeaturesHasJscvt()
{
return false;
}
bool CpuFeaturesHasAtomics()
{
return false;
}
} // namespace ark::compiler

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2022 Huawei Device Co., Ltd.
* Copyright (c) 2022-2024 Huawei Device Co., Ltd.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@ -21,6 +21,7 @@
namespace ark::compiler {
PANDA_PUBLIC_API bool CpuFeaturesHasCrc32();
PANDA_PUBLIC_API bool CpuFeaturesHasJscvt();
PANDA_PUBLIC_API bool CpuFeaturesHasAtomics();
} // namespace ark::compiler
namespace ark {