mirror of
https://gitee.com/openharmony/arkcompiler_runtime_core
synced 2024-10-07 13:54:17 +00:00
Support Armv8 CPUs without atomics instructions
Issue: https://gitee.com/openharmony/arkcompiler_runtime_core/issues/I9W369 Reason: Support Armv8 CPUs without atomics instructions Description: Support Armv8 CPUs without atomics instructions Tests: ninja all tests Signed-off-by: Sidorov Aleksei <aleksei.sidorov@huawei.com>
This commit is contained in:
parent
7e8e5f83df
commit
33c3888e5c
@ -698,6 +698,7 @@ options:
|
||||
- crc32
|
||||
- sse42
|
||||
- jscvt
|
||||
- atomics
|
||||
description: Set compiler CPU features
|
||||
tags: [perf]
|
||||
delimiter: ","
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2021-2023 Huawei Device Co., Ltd.
|
||||
* Copyright (c) 2021-2024 Huawei Device Co., Ltd.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
@ -89,6 +89,9 @@ public:
|
||||
if (CpuFeaturesHasJscvt()) {
|
||||
EnableCpuFeature(JSCVT);
|
||||
}
|
||||
if (CpuFeaturesHasAtomics()) {
|
||||
EnableCpuFeature(ATOMICS);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Arch::AARCH32:
|
||||
|
@ -285,6 +285,15 @@ void Codegen::IntrinsicSaveTlabStatsSafe([[maybe_unused]] IntrinsicInst *inst, [
|
||||
GetEncoder()->SetFalseResult();
|
||||
}
|
||||
|
||||
void Codegen::EmitAtomicByteOr(Reg addr, Reg value)
|
||||
{
|
||||
bool fastEncoding = true;
|
||||
if (GetArch() == Arch::AARCH64 && !g_options.IsCpuFeatureEnabled(CpuFeature::ATOMICS)) {
|
||||
fastEncoding = false;
|
||||
}
|
||||
GetEncoder()->EncodeAtomicByteOr(addr, value, fastEncoding);
|
||||
}
|
||||
|
||||
#ifdef INTRINSIC_SLOW_PATH_ENTRY_ENABLED
|
||||
// NOLINTNEXTLINE(readability-function-size)
|
||||
void Codegen::CreateIrtocIntrinsic(IntrinsicInst *inst, [[maybe_unused]] Reg dst, [[maybe_unused]] SRCREGS src)
|
||||
@ -343,7 +352,7 @@ void Codegen::CreateIrtocIntrinsic(IntrinsicInst *inst, [[maybe_unused]] Reg dst
|
||||
GetEncoder()->EncodeUnsignedExtendBytesToShorts(dst, src[0]);
|
||||
break;
|
||||
case RuntimeInterface::IntrinsicId::INTRINSIC_ATOMIC_BYTE_OR:
|
||||
GetEncoder()->EncodeAtomicByteOr(src[FIRST_OPERAND], src[SECOND_OPERAND]);
|
||||
EmitAtomicByteOr(src[FIRST_OPERAND], src[SECOND_OPERAND]);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
|
@ -434,6 +434,8 @@ private:
|
||||
template <typename... Args>
|
||||
void FillPostWrbCallParams(MemRef mem, Args &&...params);
|
||||
|
||||
void EmitAtomicByteOr(Reg addr, Reg value);
|
||||
|
||||
private:
|
||||
ArenaAllocator *allocator_;
|
||||
ArenaAllocator *localAllocator_;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2021-2023 Huawei Device Co., Ltd.
|
||||
* Copyright (c) 2021-2024 Huawei Device Co., Ltd.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
@ -41,6 +41,10 @@ void CodegenNative::CreateFrameInfo()
|
||||
frame->SetSetupFrame(false);
|
||||
// we don't need to save FP and LR registers only for leaf methods
|
||||
frame->SetSaveFrameAndLinkRegs(!GetGraph()->GetMethodProperties().IsLeaf());
|
||||
// we may use lr reg as temp only if we saved lr in the prologue
|
||||
if (GetTarget().SupportLinkReg()) {
|
||||
GetEncoder()->EnableLrAsTempReg(frame->GetSaveFrameAndLinkRegs());
|
||||
}
|
||||
// we never need to save unused registers in native mode
|
||||
frame->SetSaveUnusedCalleeRegs(false);
|
||||
// we have to sub/add SP in prologue/epilogue in the following cases:
|
||||
|
@ -221,7 +221,8 @@ void Encoder::EncodeCompareTest([[maybe_unused]] Reg dst, [[maybe_unused]] Reg s
|
||||
SetFalseResult();
|
||||
}
|
||||
|
||||
void Encoder::EncodeAtomicByteOr([[maybe_unused]] Reg addr, [[maybe_unused]] Reg value)
|
||||
void Encoder::EncodeAtomicByteOr([[maybe_unused]] Reg addr, [[maybe_unused]] Reg value,
|
||||
[[maybe_unused]] bool fastEncoding)
|
||||
{
|
||||
SetFalseResult();
|
||||
}
|
||||
@ -828,7 +829,7 @@ bool Encoder::IsLrAsTempRegEnabled() const
|
||||
return enableLrAsTempReg_;
|
||||
}
|
||||
|
||||
bool Encoder::IsLrAsTempRegEnabledAndReleased()
|
||||
bool Encoder::IsLrAsTempRegEnabledAndReleased() const
|
||||
{
|
||||
return IsLrAsTempRegEnabled() && IsScratchRegisterReleased(GetTarget().GetLinkReg());
|
||||
}
|
||||
@ -843,7 +844,7 @@ size_t Encoder::GetFrameSize() const
|
||||
return frameSize_;
|
||||
}
|
||||
|
||||
bool Encoder::IsScratchRegisterReleased([[maybe_unused]] compiler::Reg reg)
|
||||
bool Encoder::IsScratchRegisterReleased([[maybe_unused]] compiler::Reg reg) const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
@ -853,6 +854,11 @@ size_t Encoder::GetScratchRegistersCount() const
|
||||
return GetScratchRegistersMask().Count();
|
||||
}
|
||||
|
||||
size_t Encoder::GetScratchRegistersWithLrCount() const
|
||||
{
|
||||
return GetScratchRegistersCount() + static_cast<size_t>(IsLrAsTempRegEnabledAndReleased());
|
||||
}
|
||||
|
||||
RegMask Encoder::GetScratchRegistersMask() const
|
||||
{
|
||||
return 0;
|
||||
|
@ -209,7 +209,7 @@ public:
|
||||
// Additional check for isnan-comparison
|
||||
virtual void EncodeCompare(Reg dst, Reg src0, Reg src1, Condition cc);
|
||||
virtual void EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc);
|
||||
virtual void EncodeAtomicByteOr(Reg addr, Reg value);
|
||||
virtual void EncodeAtomicByteOr(Reg addr, Reg value, bool fastEncoding);
|
||||
struct ArgsCompressedStringCharAt {
|
||||
Reg dst;
|
||||
Reg str;
|
||||
@ -356,8 +356,9 @@ public:
|
||||
virtual Reg AcquireScratchRegister(TypeInfo type);
|
||||
virtual void AcquireScratchRegister(Reg reg);
|
||||
virtual void ReleaseScratchRegister(Reg reg);
|
||||
virtual bool IsScratchRegisterReleased(Reg reg);
|
||||
virtual bool IsScratchRegisterReleased(Reg reg) const;
|
||||
size_t GetScratchRegistersCount() const;
|
||||
size_t GetScratchRegistersWithLrCount() const;
|
||||
virtual RegMask GetScratchRegistersMask() const;
|
||||
size_t GetScratchFPRegistersCount() const;
|
||||
virtual RegMask GetScratchFpRegistersMask() const;
|
||||
@ -431,7 +432,7 @@ public:
|
||||
|
||||
bool IsLrAsTempRegEnabled() const;
|
||||
|
||||
bool IsLrAsTempRegEnabledAndReleased();
|
||||
bool IsLrAsTempRegEnabledAndReleased() const;
|
||||
NO_COPY_SEMANTIC(Encoder);
|
||||
NO_MOVE_SEMANTIC(Encoder);
|
||||
|
||||
|
@ -195,7 +195,7 @@ void Aarch32Encoder::ReleaseScratchRegister(Reg reg)
|
||||
}
|
||||
}
|
||||
|
||||
bool Aarch32Encoder::IsScratchRegisterReleased(Reg reg)
|
||||
bool Aarch32Encoder::IsScratchRegisterReleased(Reg reg) const
|
||||
{
|
||||
if (reg == GetTarget().GetLinkReg()) {
|
||||
return !lrAcquired_;
|
||||
@ -2972,7 +2972,7 @@ void Aarch32Encoder::EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc
|
||||
GetMasm()->Mov(ConvertTest(cc).Negate(), VixlReg(dst), 0x0);
|
||||
}
|
||||
|
||||
void Aarch32Encoder::EncodeAtomicByteOr([[maybe_unused]] Reg addr, [[maybe_unused]] Reg value)
|
||||
void Aarch32Encoder::EncodeAtomicByteOr(Reg addr, Reg value, [[maybe_unused]] bool fastEncoding)
|
||||
{
|
||||
/**
|
||||
* .try:
|
||||
|
@ -341,7 +341,7 @@ public:
|
||||
|
||||
void EncodeCompare(Reg dst, Reg src0, Reg src1, Condition cc) override;
|
||||
void EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc) override;
|
||||
void EncodeAtomicByteOr(Reg addr, Reg value) override;
|
||||
void EncodeAtomicByteOr(Reg addr, Reg value, bool fastEncoding) override;
|
||||
|
||||
void EncodeSelect(ArgsSelect &&args) override;
|
||||
void EncodeSelect(ArgsSelectImm &&args) override;
|
||||
@ -357,7 +357,7 @@ public:
|
||||
Reg AcquireScratchRegister(TypeInfo type) override;
|
||||
void AcquireScratchRegister(Reg reg) override;
|
||||
void ReleaseScratchRegister(Reg reg) override;
|
||||
bool IsScratchRegisterReleased(Reg reg) override;
|
||||
bool IsScratchRegisterReleased(Reg reg) const override;
|
||||
RegMask GetScratchRegistersMask() const override;
|
||||
RegMask GetScratchFpRegistersMask() const override;
|
||||
RegMask GetAvailableScratchRegisters() const override;
|
||||
|
@ -2446,12 +2446,48 @@ void Aarch64Encoder::EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc
|
||||
GetMasm()->Cset(VixlReg(dst), ConvertTest(cc));
|
||||
}
|
||||
|
||||
void Aarch64Encoder::EncodeAtomicByteOr(Reg addr, Reg value)
|
||||
void Aarch64Encoder::EncodeAtomicByteOr(Reg addr, Reg value, bool fastEncoding)
|
||||
{
|
||||
if (fastEncoding) {
|
||||
#ifndef NDEBUG
|
||||
vixl::CPUFeaturesScope scope(GetMasm(), vixl::CPUFeatures::kAtomics);
|
||||
vixl::CPUFeaturesScope scope(GetMasm(), vixl::CPUFeatures::kAtomics);
|
||||
#endif
|
||||
GetMasm()->Stsetb(VixlReg(value, BYTE_SIZE), MemOperand(VixlReg(addr)));
|
||||
GetMasm()->Stsetb(VixlReg(value, BYTE_SIZE), MemOperand(VixlReg(addr)));
|
||||
return;
|
||||
}
|
||||
|
||||
// Slow encoding, should not be used in production code!!!
|
||||
auto linkReg = GetTarget().GetLinkReg();
|
||||
auto frameReg = GetTarget().GetFrameReg();
|
||||
static constexpr size_t PAIR_OFFSET = 2 * DOUBLE_WORD_SIZE_BYTES;
|
||||
|
||||
ScopedTmpRegLazy tmp1(this);
|
||||
ScopedTmpRegLazy tmp2(this);
|
||||
Reg orValue;
|
||||
Reg storeResult;
|
||||
bool hasTemps = GetScratchRegistersWithLrCount() >= 2U;
|
||||
if (hasTemps) {
|
||||
tmp1.AcquireWithLr();
|
||||
tmp2.AcquireWithLr();
|
||||
orValue = tmp1.GetReg().As(INT32_TYPE);
|
||||
storeResult = tmp2.GetReg().As(INT32_TYPE);
|
||||
} else {
|
||||
GetMasm()->stp(VixlReg(frameReg), VixlReg(linkReg),
|
||||
MemOperand(vixl::aarch64::sp, -PAIR_OFFSET, vixl::aarch64::AddrMode::PreIndex));
|
||||
orValue = frameReg.As(INT32_TYPE);
|
||||
storeResult = linkReg.As(INT32_TYPE);
|
||||
}
|
||||
|
||||
auto *loop = static_cast<Aarch64LabelHolder *>(GetLabels())->GetLabel(CreateLabel());
|
||||
GetMasm()->Bind(loop);
|
||||
GetMasm()->Ldxrb(VixlReg(orValue), MemOperand(VixlReg(addr)));
|
||||
GetMasm()->Orr(VixlReg(orValue), VixlReg(orValue), VixlReg(value, WORD_SIZE));
|
||||
GetMasm()->Stxrb(VixlReg(storeResult), VixlReg(orValue), MemOperand(VixlReg(addr)));
|
||||
GetMasm()->Cbnz(VixlReg(storeResult), loop);
|
||||
if (!hasTemps) {
|
||||
GetMasm()->ldp(VixlReg(frameReg), VixlReg(linkReg),
|
||||
MemOperand(vixl::aarch64::sp, PAIR_OFFSET, vixl::aarch64::AddrMode::PostIndex));
|
||||
}
|
||||
}
|
||||
|
||||
void Aarch64Encoder::EncodeCmp(Reg dst, Reg src0, Reg src1, Condition cc)
|
||||
@ -2812,7 +2848,7 @@ void Aarch64Encoder::ReleaseScratchRegister(Reg reg)
|
||||
}
|
||||
}
|
||||
|
||||
bool Aarch64Encoder::IsScratchRegisterReleased(Reg reg)
|
||||
bool Aarch64Encoder::IsScratchRegisterReleased(Reg reg) const
|
||||
{
|
||||
if (reg == GetTarget().GetLinkReg()) {
|
||||
return !lrAcquired_;
|
||||
|
@ -265,7 +265,7 @@ public:
|
||||
|
||||
void EncodeCompare(Reg dst, Reg src0, Reg src1, Condition cc) override;
|
||||
void EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc) override;
|
||||
void EncodeAtomicByteOr(Reg addr, Reg value) override;
|
||||
void EncodeAtomicByteOr(Reg addr, Reg value, bool fastEncoding) override;
|
||||
|
||||
void EncodeSelect(ArgsSelect &&args) override;
|
||||
void EncodeSelect(ArgsSelectImm &&args) override;
|
||||
@ -345,7 +345,7 @@ public:
|
||||
Reg AcquireScratchRegister(TypeInfo type) override;
|
||||
void AcquireScratchRegister(Reg reg) override;
|
||||
void ReleaseScratchRegister(Reg reg) override;
|
||||
bool IsScratchRegisterReleased(Reg reg) override;
|
||||
bool IsScratchRegisterReleased(Reg reg) const override;
|
||||
|
||||
RegMask GetScratchRegistersMask() const override;
|
||||
RegMask GetScratchFpRegistersMask() const override;
|
||||
|
@ -2082,7 +2082,7 @@ void Amd64Encoder::EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc)
|
||||
GetMasm()->set(ArchCcTest(cc), ArchReg(dst, BYTE_SIZE));
|
||||
}
|
||||
|
||||
void Amd64Encoder::EncodeAtomicByteOr(Reg addr, Reg value)
|
||||
void Amd64Encoder::EncodeAtomicByteOr(Reg addr, Reg value, [[maybe_unused]] bool fastEncoding)
|
||||
{
|
||||
GetMasm()->lock().or_(asmjit::x86::byte_ptr(ArchReg(addr)), ArchReg(value, ark::compiler::BYTE_SIZE));
|
||||
}
|
||||
@ -2871,7 +2871,7 @@ void Amd64Encoder::ReleaseScratchRegister(Reg reg)
|
||||
(static_cast<Amd64RegisterDescription *>(GetRegfile()))->ReleaseScratchRegister(reg);
|
||||
}
|
||||
|
||||
bool Amd64Encoder::IsScratchRegisterReleased(Reg reg)
|
||||
bool Amd64Encoder::IsScratchRegisterReleased(Reg reg) const
|
||||
{
|
||||
return (static_cast<Amd64RegisterDescription *>(GetRegfile()))->IsScratchRegisterReleased(reg);
|
||||
}
|
||||
|
@ -226,7 +226,7 @@ void Amd64RegisterDescription::ReleaseScratchRegister(Reg reg)
|
||||
}
|
||||
}
|
||||
|
||||
bool Amd64RegisterDescription::IsScratchRegisterReleased(Reg reg)
|
||||
bool Amd64RegisterDescription::IsScratchRegisterReleased(Reg reg) const
|
||||
{
|
||||
if (reg.GetType().IsFloat()) {
|
||||
return scratchv_.Has(reg.GetId());
|
||||
|
@ -190,7 +190,7 @@ public:
|
||||
Reg AcquireScratchRegister(TypeInfo type);
|
||||
void AcquireScratchRegister(Reg reg);
|
||||
void ReleaseScratchRegister(Reg reg);
|
||||
bool IsScratchRegisterReleased(Reg reg);
|
||||
bool IsScratchRegisterReleased(Reg reg) const;
|
||||
RegList GetScratchRegisters() const;
|
||||
RegList GetScratchFPRegisters() const;
|
||||
size_t GetScratchRegistersCount() const;
|
||||
@ -298,7 +298,7 @@ public:
|
||||
void EncodeCmp(Reg dst, Reg src0, Reg src1, Condition cc) override;
|
||||
void EncodeCompare(Reg dst, Reg src0, Reg src1, Condition cc) override;
|
||||
void EncodeCompareTest(Reg dst, Reg src0, Reg src1, Condition cc) override;
|
||||
void EncodeAtomicByteOr(Reg addr, Reg value) override;
|
||||
void EncodeAtomicByteOr(Reg addr, Reg value, bool fastEncoding) override;
|
||||
|
||||
void EncodeSelect(ArgsSelect &&args) override;
|
||||
void EncodeSelect(ArgsSelectImm &&args) override;
|
||||
@ -346,7 +346,7 @@ public:
|
||||
Reg AcquireScratchRegister(TypeInfo type) override;
|
||||
void AcquireScratchRegister(Reg reg) override;
|
||||
void ReleaseScratchRegister(Reg reg) override;
|
||||
bool IsScratchRegisterReleased(Reg reg) override;
|
||||
bool IsScratchRegisterReleased(Reg reg) const override;
|
||||
RegMask GetScratchRegistersMask() const override;
|
||||
RegMask GetScratchFpRegistersMask() const override;
|
||||
RegMask GetAvailableScratchRegisters() const override;
|
||||
|
@ -119,6 +119,7 @@ void CodegenFastPath::GeneratePrologue()
|
||||
savedRegisters_ = GetUsedRegs() & RegMask(GetCalleeRegsMask(GetArch(), false));
|
||||
if (GetTarget().SupportLinkReg() && hasRuntimeCalls) {
|
||||
savedRegisters_ |= GetTarget().GetLinkReg().GetMask();
|
||||
GetEncoder()->EnableLrAsTempReg(true);
|
||||
}
|
||||
|
||||
if (GetUsedVRegs().Any()) {
|
||||
|
@ -225,7 +225,12 @@ std::string LLVMCompiler::GetCPUForArch(Arch arch)
|
||||
std::string cpu;
|
||||
switch (arch) {
|
||||
case Arch::AARCH64:
|
||||
#if defined(PANDA_TARGET_ARM64) && defined(PANDA_TARGET_LINUX)
|
||||
// Avoid specifying default cortex for arm64-linux
|
||||
cpu = g_options.WasSetLlvmCpu() ? g_options.GetLlvmCpu() : "";
|
||||
#else
|
||||
cpu = g_options.GetLlvmCpu();
|
||||
#endif
|
||||
break;
|
||||
case Arch::X86_64:
|
||||
cpu = g_options.WasSetLlvmCpu() ? g_options.GetLlvmCpu() : "";
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2022 Huawei Device Co., Ltd.
|
||||
* Copyright (c) 2022-2024 Huawei Device Co., Ltd.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
@ -33,6 +33,13 @@ bool CpuFeaturesHasJscvt()
|
||||
// NOLINTNEXTLINE(hicpp-signed-bitwise)
|
||||
return (hwcaps & HWCAP_JSCVT) != 0;
|
||||
}
|
||||
|
||||
bool CpuFeaturesHasAtomics()
|
||||
{
|
||||
auto hwcaps = getauxval(AT_HWCAP);
|
||||
// NOLINTNEXTLINE(hicpp-signed-bitwise)
|
||||
return (hwcaps & HWCAP_ATOMICS) != 0;
|
||||
}
|
||||
#elif PANDA_TARGET_WINDOWS
|
||||
bool CpuFeaturesHasCrc32()
|
||||
{
|
||||
@ -43,6 +50,11 @@ bool CpuFeaturesHasJscvt()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CpuFeaturesHasAtomics()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
#error "Unsupported target"
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2022 Huawei Device Co., Ltd.
|
||||
* Copyright (c) 2022-2024 Huawei Device Co., Ltd.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
@ -25,4 +25,9 @@ bool CpuFeaturesHasJscvt()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CpuFeaturesHasAtomics()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
} // namespace ark::compiler
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2022 Huawei Device Co., Ltd.
|
||||
* Copyright (c) 2022-2024 Huawei Device Co., Ltd.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
@ -21,6 +21,7 @@
|
||||
namespace ark::compiler {
|
||||
PANDA_PUBLIC_API bool CpuFeaturesHasCrc32();
|
||||
PANDA_PUBLIC_API bool CpuFeaturesHasJscvt();
|
||||
PANDA_PUBLIC_API bool CpuFeaturesHasAtomics();
|
||||
} // namespace ark::compiler
|
||||
|
||||
namespace ark {
|
||||
|
Loading…
Reference in New Issue
Block a user