Nikolas Klauser 83ce139721 [libc++] Add hide_from_abi check for classes
We already have a clang-tidy check for making sure that `_LIBCPP_HIDE_FROM_ABI` is on free functions. This patch extends this to class members. The places where we don't check for `_LIBCPP_HIDE_FROM_ABI` are classes for which we have an instantiation in the library.

Reviewed By: ldionne, Mordante, #libc

Spies: jplehr, mikhail.ramalho, sstefan1, libcxx-commits, krytarowski, miyuki, smeenai

Differential Revision: https://reviews.llvm.org/D142332
2023-04-16 15:23:23 +02:00

346 lines
11 KiB

// -*- C++ -*-
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
barrier synopsis
namespace std
template<class CompletionFunction = see below>
class barrier
using arrival_token = see below;
static constexpr ptrdiff_t max() noexcept;
constexpr explicit barrier(ptrdiff_t phase_count,
CompletionFunction f = CompletionFunction());
barrier(const barrier&) = delete;
barrier& operator=(const barrier&) = delete;
[[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
void wait(arrival_token&& arrival) const;
void arrive_and_wait();
void arrive_and_drop();
CompletionFunction completion; // exposition only
#include <__assert> // all public C++ headers provide the assertion handler
#include <__atomic/atomic_base.h>
#include <__atomic/memory_order.h>
#include <__availability>
#include <__config>
#include <__memory/unique_ptr.h>
#include <__thread/poll_with_backoff.h>
#include <__thread/timed_backoff_policy.h>
#include <__utility/move.h>
#include <cstddef>
#include <cstdint>
#include <limits>
#include <version>
# pragma GCC system_header
# error "<barrier> is not supported since libc++ has been configured without support for threads."
#include <__undef_macros>
#if _LIBCPP_STD_VER >= 14
struct __empty_completion
void operator()() noexcept
The default implementation of __barrier_base is a classic tree barrier.
It looks different from literature pseudocode for two main reasons:
1. Threads that call into std::barrier functions do not provide indices,
so a numbering step is added before the actual barrier algorithm,
appearing as an N+1 round to the N rounds of the tree barrier.
2. A great deal of attention has been paid to avoid cache line thrashing
by flattening the tree structure into cache-line sized arrays, that
are indexed in an efficient way.
using __barrier_phase_t = uint8_t;
class __barrier_algorithm_base;
__barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected);
bool __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier,
__barrier_phase_t __old_phase);
void __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier);
template<class _CompletionF>
class __barrier_base {
ptrdiff_t __expected_;
void (*)(__barrier_algorithm_base*)> __base_;
__atomic_base<ptrdiff_t> __expected_adjustment_;
_CompletionF __completion_;
__atomic_base<__barrier_phase_t> __phase_;
using arrival_token = __barrier_phase_t;
static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept {
return numeric_limits<ptrdiff_t>::max();
__barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
: __expected_(__expected), __base_(std::__construct_barrier_algorithm_base(this->__expected_),
__expected_adjustment_(0), __completion_(std::move(__completion)), __phase_(0)
arrival_token arrive(ptrdiff_t __update)
auto const __old_phase = __phase_.load(memory_order_relaxed);
for(; __update; --__update)
if(__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) {
__expected_ += __expected_adjustment_.load(memory_order_relaxed);
__expected_adjustment_.store(0, memory_order_relaxed);
__phase_.store(__old_phase + 2, memory_order_release);
return __old_phase;
void wait(arrival_token&& __old_phase) const
auto const __test_fn = [this, __old_phase]() -> bool {
return __phase_.load(memory_order_acquire) != __old_phase;
std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
void arrive_and_drop()
__expected_adjustment_.fetch_sub(1, memory_order_relaxed);
The alternative implementation of __barrier_base is a central barrier.
Two versions of this algorithm are provided:
1. A fairly straightforward implementation of the litterature for the
general case where the completion function is not empty.
2. An optimized implementation that exploits 2's complement arithmetic
and well-defined overflow in atomic arithmetic, to handle the phase
roll-over for free.
template<class _CompletionF>
class __barrier_base {
__atomic_base<ptrdiff_t> __expected;
__atomic_base<ptrdiff_t> __arrived;
_CompletionF __completion;
__atomic_base<bool> __phase;
using arrival_token = bool;
static constexpr ptrdiff_t max() noexcept {
return numeric_limits<ptrdiff_t>::max();
__barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
: __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false)
arrival_token arrive(ptrdiff_t update)
auto const __old_phase = __phase.load(memory_order_relaxed);
auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
auto const new_expected = __expected.load(memory_order_relaxed);
if(0 == __result) {
__arrived.store(new_expected, memory_order_relaxed);
__phase.store(!__old_phase, memory_order_release);
return __old_phase;
void wait(arrival_token&& __old_phase) const
__phase.wait(__old_phase, memory_order_acquire);
void arrive_and_drop()
__expected.fetch_sub(1, memory_order_relaxed);
class __barrier_base<__empty_completion> {
static constexpr uint64_t __expected_unit = 1ull;
static constexpr uint64_t __arrived_unit = 1ull << 32;
static constexpr uint64_t __expected_mask = __arrived_unit - 1;
static constexpr uint64_t __phase_bit = 1ull << 63;
static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask;
__atomic_base<uint64_t> __phase_arrived_expected;
constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT
return ((uint64_t(1u << 31) - __count) << 32)
| (uint64_t(1u << 31) - __count);
using arrival_token = uint64_t;
static constexpr ptrdiff_t max() noexcept {
return ptrdiff_t(1u << 31) - 1;
explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
: __phase_arrived_expected(__init(__count))
arrival_token arrive(ptrdiff_t update)
auto const __inc = __arrived_unit * update;
auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
if((__old ^ (__old + __inc)) & __phase_bit) {
__phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
return __old & __phase_bit;
void wait(arrival_token&& __phase) const
auto const __test_fn = [=]() -> bool {
uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
return ((__current & __phase_bit) != __phase);
__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
void arrive_and_drop()
__phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
template<class _CompletionF = __empty_completion>
class barrier {
__barrier_base<_CompletionF> __b_;
using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept {
return __barrier_base<_CompletionF>::max();
barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
: __b_(__count, _VSTD::move(__completion)) {
barrier(barrier const&) = delete;
barrier& operator=(barrier const&) = delete;
arrival_token arrive(ptrdiff_t __update = 1)
return __b_.arrive(__update);
void wait(arrival_token&& __phase) const
void arrive_and_wait()
void arrive_and_drop()
#endif // _LIBCPP_STD_VER >= 14
# include <atomic>
# include <concepts>
# include <iterator>
# include <memory>
# include <stdexcept>
# include <variant>