mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-13 23:17:57 +00:00
896 lines
31 KiB
C++
896 lines
31 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
/*
|
|
* Implements (almost always) lock-free atomic operations. The operations here
|
|
* are a subset of that which can be found in C++11's <atomic> header, with a
|
|
* different API to enforce consistent memory ordering constraints.
|
|
*
|
|
* Anyone caught using |volatile| for inter-thread memory safety needs to be
|
|
* sent a copy of this header and the C++11 standard.
|
|
*/
|
|
|
|
#ifndef mozilla_Atomics_h_
|
|
#define mozilla_Atomics_h_
|
|
|
|
#include "mozilla/Assertions.h"
|
|
#include "mozilla/TypeTraits.h"
|
|
|
|
#include <stdint.h>
|
|
|
|
/*
|
|
* Our minimum deployment target on clang/OS X is OS X 10.6, whose SDK
|
|
* does not have <atomic>. So be sure to check for <atomic> support
|
|
* along with C++0x support.
|
|
*/
|
|
#if defined(__clang__)
|
|
# if (__cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__)) && \
|
|
__has_include(<atomic>)
|
|
# define MOZ_HAVE_CXX11_ATOMICS
|
|
# endif
|
|
/*
|
|
* Android uses a different C++ standard library that does not provide
|
|
* support for <atomic>
|
|
*/
|
|
#elif defined(__GNUC__) && !defined(__ANDROID__)
|
|
# include "mozilla/Compiler.h"
|
|
# if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) && \
|
|
MOZ_GCC_VERSION_AT_LEAST(4, 5, 0)
|
|
# define MOZ_HAVE_CXX11_ATOMICS
|
|
# endif
|
|
#elif defined(_MSC_VER) && _MSC_VER >= 1700
|
|
# define MOZ_HAVE_CXX11_ATOMICS
|
|
#endif
|
|
|
|
namespace mozilla {
|
|
|
|
/**
|
|
* An enum of memory ordering possibilities for atomics.
|
|
*
|
|
* Memory ordering is the observable state of distinct values in memory.
|
|
* (It's a separate concept from atomicity, which concerns whether an
|
|
* operation can ever be observed in an intermediate state. Don't
|
|
* conflate the two!) Given a sequence of operations in source code on
|
|
* memory, it is *not* always the case that, at all times and on all
|
|
* cores, those operations will appear to have occurred in that exact
|
|
* sequence. First, the compiler might reorder that sequence, if it
|
|
* thinks another ordering will be more efficient. Second, the CPU may
|
|
* not expose so consistent a view of memory. CPUs will often perform
|
|
* their own instruction reordering, above and beyond that performed by
|
|
* the compiler. And each core has its own memory caches, and accesses
|
|
* (reads and writes both) to "memory" may only resolve to out-of-date
|
|
* cache entries -- not to the "most recently" performed operation in
|
|
* some global sense. Any access to a value that may be used by
|
|
* multiple threads, potentially across multiple cores, must therefore
|
|
* have a memory ordering imposed on it, for all code on all
|
|
* threads/cores to have a sufficiently coherent worldview.
|
|
*
|
|
* http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync and
|
|
* http://en.cppreference.com/w/cpp/atomic/memory_order go into more
|
|
* detail on all this, including examples of how each mode works.
|
|
*
|
|
* Note that for simplicity and practicality, not all of the modes in
|
|
* C++11 are supported. The missing C++11 modes are either subsumed by
|
|
* the modes we provide below, or not relevant for the CPUs we support
|
|
* in Gecko. These three modes are confusing enough as it is!
|
|
*/
|
|
enum MemoryOrdering {
|
|
/*
|
|
* Relaxed ordering is the simplest memory ordering: none at all.
|
|
* When the result of a write is observed, nothing may be inferred
|
|
* about other memory. Writes ostensibly performed "before" on the
|
|
* writing thread may not yet be visible. Writes performed "after" on
|
|
* the writing thread may already be visible, if the compiler or CPU
|
|
* reordered them. (The latter can happen if reads and/or writes get
|
|
* held up in per-processor caches.) Relaxed ordering means
|
|
* operations can always use cached values (as long as the actual
|
|
* updates to atomic values actually occur, correctly, eventually), so
|
|
* it's usually the fastest sort of atomic access. For this reason,
|
|
* *it's also the most dangerous kind of access*.
|
|
*
|
|
* Relaxed ordering is good for things like process-wide statistics
|
|
* counters that don't need to be consistent with anything else, so
|
|
* long as updates themselves are atomic. (And so long as any
|
|
* observations of that value can tolerate being out-of-date -- if you
|
|
* need some sort of up-to-date value, you need some sort of other
|
|
* synchronizing operation.) It's *not* good for locks, mutexes,
|
|
* reference counts, etc. that mediate access to other memory, or must
|
|
* be observably consistent with other memory.
|
|
*
|
|
* x86 architectures don't take advantage of the optimization
|
|
* opportunities that relaxed ordering permits. Thus it's possible
|
|
* that using relaxed ordering will "work" on x86 but fail elsewhere
|
|
* (ARM, say, which *does* implement non-sequentially-consistent
|
|
* relaxed ordering semantics). Be extra-careful using relaxed
|
|
* ordering if you can't easily test non-x86 architectures!
|
|
*/
|
|
Relaxed,
|
|
/*
|
|
* When an atomic value is updated with ReleaseAcquire ordering, and
|
|
* that new value is observed with ReleaseAcquire ordering, prior
|
|
* writes (atomic or not) are also observable. What ReleaseAcquire
|
|
* *doesn't* give you is any observable ordering guarantees for
|
|
* ReleaseAcquire-ordered operations on different objects. For
|
|
* example, if there are two cores that each perform ReleaseAcquire
|
|
* operations on separate objects, each core may or may not observe
|
|
* the operations made by the other core. The only way the cores can
|
|
* be synchronized with ReleaseAcquire is if they both
|
|
* ReleaseAcquire-access the same object. This implies that you can't
|
|
* necessarily describe some global total ordering of ReleaseAcquire
|
|
* operations.
|
|
*
|
|
* ReleaseAcquire ordering is good for (as the name implies) atomic
|
|
* operations on values controlling ownership of things: reference
|
|
* counts, mutexes, and the like. However, if you are thinking about
|
|
* using these to implement your own locks or mutexes, you should take
|
|
* a good, hard look at actual lock or mutex primitives first.
|
|
*/
|
|
ReleaseAcquire,
|
|
/*
|
|
* When an atomic value is updated with SequentiallyConsistent
|
|
* ordering, all writes observable when the update is observed, just
|
|
* as with ReleaseAcquire ordering. But, furthermore, a global total
|
|
* ordering of SequentiallyConsistent operations *can* be described.
|
|
* For example, if two cores perform SequentiallyConsistent operations
|
|
* on separate objects, one core will observably perform its update
|
|
* (and all previous operations will have completed), then the other
|
|
* core will observably perform its update (and all previous
|
|
* operations will have completed). (Although those previous
|
|
* operations aren't themselves ordered -- they could be intermixed,
|
|
* or ordered if they occur on atomic values with ordering
|
|
* requirements.) SequentiallyConsistent is the *simplest and safest*
|
|
* ordering of atomic operations -- it's always as if one operation
|
|
* happens, then another, then another, in some order -- and every
|
|
* core observes updates to happen in that single order. Because it
|
|
* has the most synchronization requirements, operations ordered this
|
|
* way also tend to be slowest.
|
|
*
|
|
* SequentiallyConsistent ordering can be desirable when multiple
|
|
* threads observe objects, and they all have to agree on the
|
|
* observable order of changes to them. People expect
|
|
* SequentiallyConsistent ordering, even if they shouldn't, when
|
|
* writing code, atomic or otherwise. SequentiallyConsistent is also
|
|
* the ordering of choice when designing lockless data structures. If
|
|
* you don't know what order to use, use this one.
|
|
*/
|
|
SequentiallyConsistent,
|
|
};
|
|
|
|
} // namespace mozilla
|
|
|
|
// Build up the underlying intrinsics.
|
|
#ifdef MOZ_HAVE_CXX11_ATOMICS
|
|
|
|
# include <atomic>
|
|
|
|
namespace mozilla {
|
|
namespace detail {
|
|
|
|
template<MemoryOrdering Order> struct AtomicOrderConstraints;
|
|
|
|
template<>
|
|
struct AtomicOrderConstraints<Relaxed>
|
|
{
|
|
static const std::memory_order AtomicRMWOrder = std::memory_order_relaxed;
|
|
static const std::memory_order LoadOrder = std::memory_order_relaxed;
|
|
static const std::memory_order StoreOrder = std::memory_order_relaxed;
|
|
};
|
|
|
|
template<>
|
|
struct AtomicOrderConstraints<ReleaseAcquire>
|
|
{
|
|
static const std::memory_order AtomicRMWOrder = std::memory_order_acq_rel;
|
|
static const std::memory_order LoadOrder = std::memory_order_acquire;
|
|
static const std::memory_order StoreOrder = std::memory_order_release;
|
|
};
|
|
|
|
template<>
|
|
struct AtomicOrderConstraints<SequentiallyConsistent>
|
|
{
|
|
static const std::memory_order AtomicRMWOrder = std::memory_order_seq_cst;
|
|
static const std::memory_order LoadOrder = std::memory_order_seq_cst;
|
|
static const std::memory_order StoreOrder = std::memory_order_seq_cst;
|
|
};
|
|
|
|
template<typename T, MemoryOrdering Order>
|
|
struct IntrinsicBase
|
|
{
|
|
typedef std::atomic<T> ValueType;
|
|
typedef AtomicOrderConstraints<Order> OrderedOp;
|
|
};
|
|
|
|
template<typename T, MemoryOrdering Order>
|
|
struct IntrinsicMemoryOps : public IntrinsicBase<T, Order>
|
|
{
|
|
typedef IntrinsicBase<T, Order> Base;
|
|
static T load(const typename Base::ValueType& ptr) {
|
|
return ptr.load(Base::OrderedOp::LoadOrder);
|
|
}
|
|
static void store(typename Base::ValueType& ptr, T val) {
|
|
ptr.store(val, Base::OrderedOp::StoreOrder);
|
|
}
|
|
static T exchange(typename Base::ValueType& ptr, T val) {
|
|
return ptr.exchange(val, Base::OrderedOp::AtomicRMWOrder);
|
|
}
|
|
};
|
|
|
|
template<typename T, MemoryOrdering Order>
|
|
struct IntrinsicAddSub : public IntrinsicBase<T, Order>
|
|
{
|
|
typedef IntrinsicBase<T, Order> Base;
|
|
static T add(typename Base::ValueType& ptr, T val) {
|
|
return ptr.fetch_add(val, Base::OrderedOp::AtomicRMWOrder);
|
|
}
|
|
static T sub(typename Base::ValueType& ptr, T val) {
|
|
return ptr.fetch_sub(val, Base::OrderedOp::AtomicRMWOrder);
|
|
}
|
|
};
|
|
|
|
template<typename T, MemoryOrdering Order>
|
|
struct IntrinsicAddSub<T*, Order> : public IntrinsicBase<T*, Order>
|
|
{
|
|
typedef IntrinsicBase<T*, Order> Base;
|
|
static T* add(typename Base::ValueType& ptr, ptrdiff_t val) {
|
|
return ptr.fetch_add(fixupAddend(val), Base::OrderedOp::AtomicRMWOrder);
|
|
}
|
|
static T* sub(typename Base::ValueType& ptr, ptrdiff_t val) {
|
|
return ptr.fetch_sub(fixupAddend(val), Base::OrderedOp::AtomicRMWOrder);
|
|
}
|
|
private:
|
|
/*
|
|
* GCC 4.6's <atomic> header has a bug where adding X to an
|
|
* atomic<T*> is not the same as adding X to a T*. Hence the need
|
|
* for this function to provide the correct addend.
|
|
*/
|
|
static ptrdiff_t fixupAddend(ptrdiff_t val) {
|
|
#if defined(__clang__) || defined(_MSC_VER)
|
|
return val;
|
|
#elif defined(__GNUC__) && MOZ_GCC_VERSION_AT_LEAST(4, 6, 0) && \
|
|
!MOZ_GCC_VERSION_AT_LEAST(4, 7, 0)
|
|
return val * sizeof(T);
|
|
#else
|
|
return val;
|
|
#endif
|
|
}
|
|
};
|
|
|
|
template<typename T, MemoryOrdering Order>
|
|
struct IntrinsicIncDec : public IntrinsicAddSub<T, Order>
|
|
{
|
|
typedef IntrinsicBase<T, Order> Base;
|
|
static T inc(typename Base::ValueType& ptr) {
|
|
return IntrinsicAddSub<T, Order>::add(ptr, 1);
|
|
}
|
|
static T dec(typename Base::ValueType& ptr) {
|
|
return IntrinsicAddSub<T, Order>::sub(ptr, 1);
|
|
}
|
|
};
|
|
|
|
template<typename T, MemoryOrdering Order>
|
|
struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
|
|
public IntrinsicIncDec<T, Order>
|
|
{
|
|
typedef IntrinsicBase<T, Order> Base;
|
|
static T or_(typename Base::ValueType& ptr, T val) {
|
|
return ptr.fetch_or(val, Base::OrderedOp::AtomicRMWOrder);
|
|
}
|
|
static T xor_(typename Base::ValueType& ptr, T val) {
|
|
return ptr.fetch_xor(val, Base::OrderedOp::AtomicRMWOrder);
|
|
}
|
|
static T and_(typename Base::ValueType& ptr, T val) {
|
|
return ptr.fetch_and(val, Base::OrderedOp::AtomicRMWOrder);
|
|
}
|
|
};
|
|
|
|
template<typename T, MemoryOrdering Order>
|
|
struct AtomicIntrinsics<T*, Order>
|
|
: public IntrinsicMemoryOps<T*, Order>, public IntrinsicIncDec<T*, Order>
|
|
{
|
|
};
|
|
|
|
} // namespace detail
|
|
} // namespace mozilla
|
|
|
|
#elif defined(__GNUC__)
|
|
|
|
namespace mozilla {
|
|
namespace detail {
|
|
|
|
/*
|
|
* The __sync_* family of intrinsics is documented here:
|
|
*
|
|
* http://gcc.gnu.org/onlinedocs/gcc-4.6.4/gcc/Atomic-Builtins.html
|
|
*
|
|
* While these intrinsics are deprecated in favor of the newer __atomic_*
|
|
* family of intrincs:
|
|
*
|
|
* http://gcc.gnu.org/onlinedocs/gcc-4.7.3/gcc/_005f_005fatomic-Builtins.html
|
|
*
|
|
* any GCC version that supports the __atomic_* intrinsics will also support
|
|
* the <atomic> header and so will be handled above. We provide a version of
|
|
* atomics using the __sync_* intrinsics to support older versions of GCC.
|
|
*
|
|
* All __sync_* intrinsics that we use below act as full memory
|
|
* barriers, for both compiler and hardware reordering, with one notable
|
|
* exception: __sync_lock_test_and_set. This intrinsic is not a full
|
|
* barrier, but only an acquire barrier. In practice, this has turned
|
|
* out to not matter very much, and will become less important as newer
|
|
* compilers are used.
|
|
*/
|
|
|
|
template<MemoryOrdering Order> struct Barrier;
|
|
|
|
/*
|
|
* Some processors (in particular, x86) don't require quite so many calls to
|
|
* __sync_sychronize as our specializations of Barrier produce. If
|
|
* performance turns out to be an issue, defining these specializations
|
|
* on a per-processor basis would be a good first tuning step.
|
|
*/
|
|
|
|
template<>
|
|
struct Barrier<Relaxed>
|
|
{
|
|
static void beforeLoad() {}
|
|
static void afterLoad() {}
|
|
static void beforeStore() {}
|
|
static void afterStore() {}
|
|
};
|
|
|
|
template<>
|
|
struct Barrier<ReleaseAcquire>
|
|
{
|
|
static void beforeLoad() {}
|
|
static void afterLoad() { __sync_synchronize(); }
|
|
static void beforeStore() { __sync_synchronize(); }
|
|
static void afterStore() {}
|
|
};
|
|
|
|
template<>
|
|
struct Barrier<SequentiallyConsistent>
|
|
{
|
|
static void beforeLoad() { __sync_synchronize(); }
|
|
static void afterLoad() { __sync_synchronize(); }
|
|
static void beforeStore() { __sync_synchronize(); }
|
|
static void afterStore() { __sync_synchronize(); }
|
|
};
|
|
|
|
template<typename T, MemoryOrdering Order>
|
|
struct IntrinsicMemoryOps
|
|
{
|
|
static T load(const T& ptr) {
|
|
Barrier<Order>::beforeLoad();
|
|
T val = ptr;
|
|
Barrier<Order>::afterLoad();
|
|
return val;
|
|
}
|
|
static void store(T& ptr, T val) {
|
|
Barrier<Order>::beforeStore();
|
|
ptr = val;
|
|
Barrier<Order>::afterStore();
|
|
}
|
|
static T exchange(T& ptr, T val) {
|
|
return __sync_lock_test_and_set(&ptr, val);
|
|
}
|
|
};
|
|
|
|
template<typename T>
|
|
struct IntrinsicAddSub
|
|
{
|
|
typedef T ValueType;
|
|
static T add(T& ptr, T val) {
|
|
return __sync_fetch_and_add(&ptr, val);
|
|
}
|
|
static T sub(T& ptr, T val) {
|
|
return __sync_fetch_and_sub(&ptr, val);
|
|
}
|
|
};
|
|
|
|
template<typename T>
|
|
struct IntrinsicAddSub<T*>
|
|
{
|
|
typedef T* ValueType;
|
|
/*
|
|
* The reinterpret_casts are needed so that
|
|
* __sync_fetch_and_{add,sub} will properly type-check.
|
|
*
|
|
* Also, these functions do not provide standard semantics for
|
|
* pointer types, so we need to adjust the addend.
|
|
*/
|
|
static ValueType add(ValueType& ptr, ptrdiff_t val) {
|
|
ValueType amount = reinterpret_cast<ValueType>(val * sizeof(T));
|
|
return __sync_fetch_and_add(&ptr, amount);
|
|
}
|
|
static ValueType sub(ValueType& ptr, ptrdiff_t val) {
|
|
ValueType amount = reinterpret_cast<ValueType>(val * sizeof(T));
|
|
return __sync_fetch_and_sub(&ptr, amount);
|
|
}
|
|
};
|
|
|
|
template<typename T>
|
|
struct IntrinsicIncDec : public IntrinsicAddSub<T>
|
|
{
|
|
static T inc(T& ptr) { return IntrinsicAddSub<T>::add(ptr, 1); }
|
|
static T dec(T& ptr) { return IntrinsicAddSub<T>::sub(ptr, 1); }
|
|
};
|
|
|
|
template<typename T, MemoryOrdering Order>
|
|
struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
|
|
public IntrinsicIncDec<T>
|
|
{
|
|
static T or_(T& ptr, T val) {
|
|
return __sync_fetch_and_or(&ptr, val);
|
|
}
|
|
static T xor_(T& ptr, T val) {
|
|
return __sync_fetch_and_xor(&ptr, val);
|
|
}
|
|
static T and_(T& ptr, T val) {
|
|
return __sync_fetch_and_and(&ptr, val);
|
|
}
|
|
};
|
|
|
|
template<typename T, MemoryOrdering Order>
|
|
struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
|
|
public IntrinsicIncDec<T*>
|
|
{
|
|
};
|
|
|
|
} // namespace detail
|
|
} // namespace mozilla
|
|
|
|
#elif defined(_MSC_VER)
|
|
|
|
/*
|
|
* Windows comes with a full complement of atomic operations.
|
|
* Unfortunately, most of those aren't available for Windows XP (even if
|
|
* the compiler supports intrinsics for them), which is the oldest
|
|
* version of Windows we support. Therefore, we only provide operations
|
|
* on 32-bit datatypes for 32-bit Windows versions; for 64-bit Windows
|
|
* versions, we support 64-bit datatypes as well.
|
|
*
|
|
* To avoid namespace pollution issues, we declare whatever functions we
|
|
* need ourselves.
|
|
*/
|
|
|
|
extern "C" {
|
|
long __cdecl _InterlockedExchangeAdd(long volatile* dst, long value);
|
|
long __cdecl _InterlockedOr(long volatile* dst, long value);
|
|
long __cdecl _InterlockedXor(long volatile* dst, long value);
|
|
long __cdecl _InterlockedAnd(long volatile* dst, long value);
|
|
long __cdecl _InterlockedExchange(long volatile *dst, long value);
|
|
}
|
|
|
|
# pragma intrinsic(_InterlockedExchangeAdd)
|
|
# pragma intrinsic(_InterlockedOr)
|
|
# pragma intrinsic(_InterlockedXor)
|
|
# pragma intrinsic(_InterlockedAnd)
|
|
# pragma intrinsic(_InterlockedExchange)
|
|
|
|
namespace mozilla {
|
|
namespace detail {
|
|
|
|
# if !defined(_M_IX86) && !defined(_M_X64)
|
|
/*
|
|
* The implementations below are optimized for x86ish systems. You
|
|
* will have to modify them if you are porting to Windows on a
|
|
* different architecture.
|
|
*/
|
|
# error "Unknown CPU type"
|
|
# endif
|
|
|
|
/*
|
|
* This template should define seven functions and |Type|, the datatype upon
|
|
* which the functions operate. These five functions perform the obvious
|
|
* operation on the value contained in |*ptr| combined with |val| and return
|
|
* the value previously stored in |*ptr|
|
|
*
|
|
* static Type add(Type* ptr, Type val);
|
|
* static Type sub(Type* ptr, Type val);
|
|
* static Type or_(Type* ptr, Type val);
|
|
* static Type xor_(Type* ptr, Type val);
|
|
* static Type and_(Type* ptr, Type val);
|
|
*
|
|
* This function atomically stores |val| into |*ptr| and must provide a
|
|
* full memory fence after the store to prevent compiler and hardware
|
|
* instruction reordering. It should also act as a compiler barrier
|
|
* to prevent reads and writes from moving to after the store.
|
|
*
|
|
* static void store(Type* ptr, Type val);
|
|
*
|
|
* This function atomically stores |val| into |*ptr| and returns the
|
|
* previous contents of *ptr;
|
|
*
|
|
* static Type exchange(Type* ptr, Type val);
|
|
*/
|
|
template<size_t DataSize> struct PrimitiveIntrinsics;
|
|
|
|
template<>
|
|
struct PrimitiveIntrinsics<4>
|
|
{
|
|
typedef long Type;
|
|
|
|
static Type add(Type* ptr, Type val) {
|
|
return _InterlockedExchangeAdd(ptr, val);
|
|
}
|
|
static Type sub(Type* ptr, Type val) {
|
|
/*
|
|
* _InterlockedExchangeSubtract isn't available before Windows 7,
|
|
* and we must support Windows XP.
|
|
*/
|
|
return _InterlockedExchangeAdd(ptr, -val);
|
|
}
|
|
static Type or_(Type* ptr, Type val) {
|
|
return _InterlockedOr(ptr, val);
|
|
}
|
|
static Type xor_(Type* ptr, Type val) {
|
|
return _InterlockedXor(ptr, val);
|
|
}
|
|
static Type and_(Type* ptr, Type val) {
|
|
return _InterlockedAnd(ptr, val);
|
|
}
|
|
static void store(Type* ptr, Type val) {
|
|
_InterlockedExchange(ptr, val);
|
|
}
|
|
static Type exchange(Type* ptr, Type val) {
|
|
return _InterlockedExchange(ptr, val);
|
|
}
|
|
};
|
|
|
|
# if defined(_M_X64)
|
|
|
|
extern "C" {
|
|
long long __cdecl _InterlockedExchangeAdd64(long long volatile* dst,
|
|
long long value);
|
|
long long __cdecl _InterlockedOr64(long long volatile* dst,
|
|
long long value);
|
|
long long __cdecl _InterlockedXor64(long long volatile* dst,
|
|
long long value);
|
|
long long __cdecl _InterlockedAnd64(long long volatile* dst,
|
|
long long value);
|
|
long long __cdecl _InterlockedExchange64(long long volatile* dst,
|
|
long long value);
|
|
}
|
|
|
|
# pragma intrinsic(_InterlockedExchangeAdd64)
|
|
# pragma intrinsic(_InterlockedOr64)
|
|
# pragma intrinsic(_InterlockedXor64)
|
|
# pragma intrinsic(_InterlockedAnd64)
|
|
# pragma intrinsic(_InterlockedExchange64)
|
|
|
|
template <>
|
|
struct PrimitiveIntrinsics<8>
|
|
{
|
|
typedef __int64 Type;
|
|
|
|
static Type add(Type* ptr, Type val) {
|
|
return _InterlockedExchangeAdd64(ptr, val);
|
|
}
|
|
static Type sub(Type* ptr, Type val) {
|
|
/*
|
|
* There is no _InterlockedExchangeSubtract64.
|
|
*/
|
|
return _InterlockedExchangeAdd64(ptr, -val);
|
|
}
|
|
static Type or_(Type* ptr, Type val) {
|
|
return _InterlockedOr64(ptr, val);
|
|
}
|
|
static Type xor_(Type* ptr, Type val) {
|
|
return _InterlockedXor64(ptr, val);
|
|
}
|
|
static Type and_(Type* ptr, Type val) {
|
|
return _InterlockedAnd64(ptr, val);
|
|
}
|
|
static void store(Type* ptr, Type val) {
|
|
_InterlockedExchange64(ptr, val);
|
|
}
|
|
static Type exchange(Type* ptr, Type val) {
|
|
return _InterlockedExchange64(ptr, val);
|
|
}
|
|
};
|
|
|
|
# endif
|
|
|
|
extern "C" { void _ReadWriteBarrier(); }
|
|
|
|
# pragma intrinsic(_ReadWriteBarrier)
|
|
|
|
template<MemoryOrdering Order> struct Barrier;
|
|
|
|
/*
|
|
* We do not provide an afterStore method in Barrier, as Relaxed and
|
|
* ReleaseAcquire orderings do not require one, and the required barrier
|
|
* for SequentiallyConsistent is handled by PrimitiveIntrinsics.
|
|
*/
|
|
|
|
template<>
|
|
struct Barrier<Relaxed>
|
|
{
|
|
static void beforeLoad() {}
|
|
static void afterLoad() {}
|
|
static void beforeStore() {}
|
|
};
|
|
|
|
template<>
|
|
struct Barrier<ReleaseAcquire>
|
|
{
|
|
static void beforeLoad() {}
|
|
static void afterLoad() { _ReadWriteBarrier(); }
|
|
static void beforeStore() { _ReadWriteBarrier(); }
|
|
};
|
|
|
|
template<>
|
|
struct Barrier<SequentiallyConsistent>
|
|
{
|
|
static void beforeLoad() { _ReadWriteBarrier(); }
|
|
static void afterLoad() { _ReadWriteBarrier(); }
|
|
static void beforeStore() { _ReadWriteBarrier(); }
|
|
};
|
|
|
|
template<typename PrimType, typename T>
|
|
struct CastHelper
|
|
{
|
|
static PrimType toPrimType(T val) { return static_cast<PrimType>(val); }
|
|
static T fromPrimType(PrimType val) { return static_cast<T>(val); }
|
|
};
|
|
|
|
template<typename PrimType, typename T>
|
|
struct CastHelper<PrimType, T*>
|
|
{
|
|
static PrimType toPrimType(T* val) { return reinterpret_cast<PrimType>(val); }
|
|
static T* fromPrimType(PrimType val) { return reinterpret_cast<T*>(val); }
|
|
};
|
|
|
|
template<typename T>
|
|
struct IntrinsicBase
|
|
{
|
|
typedef T ValueType;
|
|
typedef PrimitiveIntrinsics<sizeof(T)> Primitives;
|
|
typedef typename Primitives::Type PrimType;
|
|
MOZ_STATIC_ASSERT(sizeof(PrimType) == sizeof(T),
|
|
"Selection of PrimitiveIntrinsics was wrong");
|
|
typedef CastHelper<PrimType, T> Cast;
|
|
};
|
|
|
|
template<typename T, MemoryOrdering Order>
|
|
struct IntrinsicMemoryOps : public IntrinsicBase<T>
|
|
{
|
|
static ValueType load(const ValueType& ptr) {
|
|
Barrier<Order>::beforeLoad();
|
|
ValueType val = ptr;
|
|
Barrier<Order>::afterLoad();
|
|
return val;
|
|
}
|
|
static void store(ValueType& ptr, ValueType val) {
|
|
// For SequentiallyConsistent, Primitives::store() will generate the
|
|
// proper memory fence. Everything else just needs a barrier before
|
|
// the store.
|
|
if (Order == SequentiallyConsistent) {
|
|
Primitives::store(reinterpret_cast<PrimType*>(&ptr),
|
|
Cast::toPrimType(val));
|
|
} else {
|
|
Barrier<Order>::beforeStore();
|
|
ptr = val;
|
|
}
|
|
}
|
|
static ValueType exchange(ValueType& ptr, ValueType val) {
|
|
PrimType oldval =
|
|
Primitives::exchange(reinterpret_cast<PrimType*>(&ptr),
|
|
Cast::toPrimType(val));
|
|
return Cast::fromPrimType(oldval);
|
|
}
|
|
};
|
|
|
|
template<typename T>
|
|
struct IntrinsicApplyHelper : public IntrinsicBase<T>
|
|
{
|
|
typedef PrimType (*BinaryOp)(PrimType*, PrimType);
|
|
typedef PrimType (*UnaryOp)(PrimType*);
|
|
|
|
static ValueType applyBinaryFunction(BinaryOp op, ValueType& ptr,
|
|
ValueType val) {
|
|
PrimType* primTypePtr = reinterpret_cast<PrimType*>(&ptr);
|
|
PrimType primTypeVal = Cast::toPrimType(val);
|
|
return Cast::fromPrimType(op(primTypePtr, primTypeVal));
|
|
}
|
|
|
|
static ValueType applyUnaryFunction(UnaryOp op, ValueType& ptr) {
|
|
PrimType* primTypePtr = reinterpret_cast<PrimType*>(&ptr);
|
|
return Cast::fromPrimType(op(primTypePtr));
|
|
}
|
|
};
|
|
|
|
template<typename T>
|
|
struct IntrinsicAddSub : public IntrinsicApplyHelper<T>
|
|
{
|
|
static ValueType add(ValueType& ptr, ValueType val) {
|
|
return applyBinaryFunction(&Primitives::add, ptr, val);
|
|
}
|
|
static ValueType sub(ValueType& ptr, ValueType val) {
|
|
return applyBinaryFunction(&Primitives::sub, ptr, val);
|
|
}
|
|
};
|
|
|
|
template<typename T>
|
|
struct IntrinsicAddSub<T*> : public IntrinsicApplyHelper<T*>
|
|
{
|
|
static ValueType add(ValueType& ptr, ptrdiff_t amount) {
|
|
return applyBinaryFunction(&Primitives::add, ptr,
|
|
(ValueType)(amount * sizeof(ValueType)));
|
|
}
|
|
static ValueType sub(ValueType& ptr, ptrdiff_t amount) {
|
|
return applyBinaryFunction(&Primitives::sub, ptr,
|
|
(ValueType)(amount * sizeof(ValueType)));
|
|
}
|
|
};
|
|
|
|
template<typename T>
|
|
struct IntrinsicIncDec : public IntrinsicAddSub<T>
|
|
{
|
|
static ValueType inc(ValueType& ptr) { return add(ptr, 1); }
|
|
static ValueType dec(ValueType& ptr) { return sub(ptr, 1); }
|
|
};
|
|
|
|
template<typename T, MemoryOrdering Order>
|
|
struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
|
|
public IntrinsicIncDec<T>
|
|
{
|
|
static ValueType or_(ValueType& ptr, T val) {
|
|
return applyBinaryFunction(&Primitives::or_, ptr, val);
|
|
}
|
|
static ValueType xor_(ValueType& ptr, T val) {
|
|
return applyBinaryFunction(&Primitives::xor_, ptr, val);
|
|
}
|
|
static ValueType and_(ValueType& ptr, T val) {
|
|
return applyBinaryFunction(&Primitives::and_, ptr, val);
|
|
}
|
|
};
|
|
|
|
template<typename T, MemoryOrdering Order>
|
|
struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
|
|
public IntrinsicIncDec<T*>
|
|
{
|
|
};
|
|
|
|
} // namespace detail
|
|
} // namespace mozilla
|
|
|
|
#else
|
|
# error "Atomic compiler intrinsics are not supported on your platform"
|
|
#endif
|
|
|
|
namespace mozilla {
|
|
|
|
namespace detail {
|
|
|
|
template<typename T, MemoryOrdering Order>
|
|
class AtomicBase
|
|
{
|
|
protected:
|
|
typedef typename detail::AtomicIntrinsics<T, Order> Intrinsics;
|
|
typename Intrinsics::ValueType mValue;
|
|
|
|
public:
|
|
AtomicBase() : mValue() {}
|
|
AtomicBase(T aInit) { Intrinsics::store(mValue, aInit); }
|
|
|
|
T operator++(int) { return Intrinsics::inc(mValue); }
|
|
T operator--(int) { return Intrinsics::dec(mValue); }
|
|
T operator++() { return Intrinsics::inc(mValue) + 1; }
|
|
T operator--() { return Intrinsics::dec(mValue) - 1; }
|
|
|
|
operator T() const { return Intrinsics::load(mValue); }
|
|
|
|
/**
|
|
* Performs an atomic swap operation. aValue is stored and the previous
|
|
* value of this variable is returned.
|
|
*/
|
|
T exchange(T aValue) {
|
|
return Intrinsics::exchange(mValue, aValue);
|
|
}
|
|
|
|
private:
|
|
template<MemoryOrdering AnyOrder>
|
|
AtomicBase(const AtomicBase<T, AnyOrder>& aCopy) MOZ_DELETE;
|
|
};
|
|
|
|
} // namespace detail
|
|
|
|
/**
|
|
* A wrapper for a type that enforces that all memory accesses are atomic.
|
|
*
|
|
* In general, where a variable |T foo| exists, |Atomic<T> foo| can be
|
|
* used in its place. In addition to atomic store and load operations,
|
|
* compound assignment and increment/decrement operators are implemented
|
|
* which perform the corresponding read-modify-write operation
|
|
* atomically. Finally, an atomic swap method is provided.
|
|
*
|
|
* Atomic accesses are sequentially consistent by default. You should
|
|
* use the default unless you are tall enough to ride the
|
|
* memory-ordering roller coaster (if you're not sure, you aren't) and
|
|
* you have a compelling reason to do otherwise.
|
|
*
|
|
* There is one exception to the case of atomic memory accesses: providing an
|
|
* initial value of the atomic value is not guaranteed to be atomic. This is a
|
|
* deliberate design choice that enables static atomic variables to be declared
|
|
* without introducing extra static constructors.
|
|
*/
|
|
template<typename T, MemoryOrdering Order = SequentiallyConsistent>
|
|
class Atomic : public detail::AtomicBase<T, Order>
|
|
{
|
|
// We only support 32-bit types on 32-bit Windows, which constrains our
|
|
// implementation elsewhere. But we support pointer-sized types everywhere.
|
|
MOZ_STATIC_ASSERT(sizeof(T) == 4 || (sizeof(uintptr_t) == 8 && sizeof(T) == 8),
|
|
"mozilla/Atomics.h only supports 32-bit and pointer-sized types");
|
|
// Regardless of the OS, we only support integral types here.
|
|
MOZ_STATIC_ASSERT(IsIntegral<T>::value, "can only have integral atomic variables");
|
|
|
|
typedef typename detail::AtomicBase<T, Order> Base;
|
|
|
|
public:
|
|
Atomic() : detail::AtomicBase<T, Order>() {}
|
|
Atomic(T aInit) : detail::AtomicBase<T, Order>(aInit) {}
|
|
|
|
T operator+=(T delta) { return Base::Intrinsics::add(Base::mValue, delta) + delta; }
|
|
T operator-=(T delta) { return Base::Intrinsics::sub(Base::mValue, delta) - delta; }
|
|
T operator|=(T val) { return Base::Intrinsics::or_(Base::mValue, val) | val; }
|
|
T operator^=(T val) { return Base::Intrinsics::xor_(Base::mValue, val) ^ val; }
|
|
T operator&=(T val) { return Base::Intrinsics::and_(Base::mValue, val) & val; }
|
|
|
|
T operator=(T aValue) {
|
|
Base::Intrinsics::store(Base::mValue, aValue);
|
|
return aValue;
|
|
}
|
|
|
|
private:
|
|
Atomic(Atomic<T, Order>& aOther) MOZ_DELETE;
|
|
};
|
|
|
|
/**
|
|
* A partial specialization of Atomic for pointer variables.
|
|
*
|
|
* Like Atomic<T>, Atomic<T*> is equivalent in most respects to a regular T*
|
|
* variable. An atomic compare-and-swap primitive for pointer variables is
|
|
* provided, as are atomic increment and decement operators. Also provided
|
|
* are the compound assignment operators for addition and subtraction.
|
|
* Atomic swap (via exchange()) is included as well.
|
|
*
|
|
* Atomic accesses are sequentially consistent by default. You should
|
|
* use the default unless you are tall enough to ride the
|
|
* memory-ordering roller coaster (if you're not sure, you aren't) and
|
|
* you have a compelling reason to do otherwise.
|
|
*
|
|
* There is one exception to the case of atomic memory accesses: providing an
|
|
* initial value of the atomic value is not guaranteed to be atomic. This is a
|
|
* deliberate design choice that enables static atomic variables to be declared
|
|
* without introducing extra static constructors.
|
|
*/
|
|
template<typename T, MemoryOrdering Order>
|
|
class Atomic<T*, Order> : public detail::AtomicBase<T*, Order>
|
|
{
|
|
typedef typename detail::AtomicBase<T*, Order> Base;
|
|
|
|
public:
|
|
Atomic() : detail::AtomicBase<T*, Order>() {}
|
|
Atomic(T* aInit) : detail::AtomicBase<T*, Order>(aInit) {}
|
|
|
|
T* operator +=(ptrdiff_t delta) {
|
|
return Base::Intrinsics::add(Base::mValue, delta) + delta;
|
|
}
|
|
T* operator -=(ptrdiff_t delta) {
|
|
return Base::Intrinsics::sub(Base::mValue, delta) - delta;
|
|
}
|
|
|
|
T* operator=(T* aValue) {
|
|
Base::Intrinsics::store(Base::mValue, aValue);
|
|
return aValue;
|
|
}
|
|
|
|
private:
|
|
Atomic(Atomic<T*, Order>& aOther) MOZ_DELETE;
|
|
};
|
|
|
|
} // namespace mozilla
|
|
|
|
#endif /* mozilla_Atomics_h_ */
|