darling-WTF/wtf/CountingLock.h

/*
 * Copyright (C) 2017 Apple Inc. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 */

#pragma once

#include <wtf/DataLog.h>
#include <wtf/LockAlgorithm.h>

namespace WTF {

// This is mostly just a word-sized WTF::Lock. It supports basically everything that lock supports. But as
// a bonus, it atomically counts lock() calls and allows you to perform an optimistic read transaction by
// comparing the count before and after the transaction. If at the start of the transaction the lock is
// not held and the count remains the same throughout the transaction, then you know that nobody could
// have modified your data structure while you ran. You can even use this to optimistically read pointers
// that could become dangling under concurrent writes, if you just revalidate the count every time you're
// about to do something dangerous.
//
// This is largely inspired by StampedLock from Java:
// https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/locks/CountingLock.html
//
// This is simplified a lot compared to StampedLock. Unlike StampedLock, it uses an exclusive lock as a
// fallback. There is no way to acquire a CountingLock for read. The only read access is via optimistic
// read transactions.
//
// CountingLock provides two ways of doing optimistic reads:
//
// - The easy way, where CountingLock does all of the fencing for you. That fencing is free on x86 but
//   somewhat expensive on ARM.
// - The hard way, where you do fencing yourself using Dependency. This allows you to be fenceless on both
//   x86 and ARM.
//
// The latter is important for us because some GC paths are known to be sensitive to fences on ARM.

class CountingLock final {
    WTF_MAKE_NONCOPYABLE(CountingLock);
    WTF_MAKE_FAST_ALLOCATED;

    typedef unsigned LockType;
    
    static constexpr LockType isHeldBit = 1;
    static constexpr LockType hasParkedBit = 2;
    static constexpr LockType mask = isHeldBit | hasParkedBit;
    static constexpr LockType shift = 2;
    static constexpr LockType countUnit = 4;
    
    struct LockHooks {
        static LockType lockHook(LockType value)
        {
            return value + countUnit;
        }
        
        static LockType unlockHook(LockType value) { return value; }
        static LockType parkHook(LockType value) { return value; }
        static LockType handoffHook(LockType value) { return value; }
    };
    
    typedef LockAlgorithm<LockType, isHeldBit, hasParkedBit, LockHooks> ExclusiveAlgorithm;
    
public:
    CountingLock() = default;
    
    bool tryLock()
    {
        return ExclusiveAlgorithm::tryLock(m_word);
    }

    void lock()
    {
        if (UNLIKELY(!ExclusiveAlgorithm::lockFast(m_word)))
            lockSlow();
    }
    
    void unlock()
    {
        if (UNLIKELY(!ExclusiveAlgorithm::unlockFast(m_word)))
            unlockSlow();
    }
    
    bool isHeld() const
    {
        return ExclusiveAlgorithm::isLocked(m_word);
    }
    
    bool isLocked() const
    {
        return isHeld();
    }
    
    // The only thing you're allowed to infer from this value is that if it's zero, then you didn't get
    // a real count.
    class Count {
    public:
        explicit operator bool() const { return !!m_value; }
        
        bool operator==(const Count& other) const { return m_value == other.m_value; }
        bool operator!=(const Count& other) const { return m_value != other.m_value; }
        
    private:
        friend class CountingLock;
        
        LockType m_value { 0 };
    };
    
    // Example of how to use this:
    //
    //     int read()
    //     {
    //         if (CountingLock::Count count = m_lock.tryOptimisticRead()) {
    //             int value = m_things;
    //             if (m_lock.validate(count))
    //                 return value; // success!
    //         }
    //         auto locker = holdLock(m_lock);
    //         int value = m_things;
    //         return value;
    //     }
    //
    // If tryOptimisitcRead() runs when the lock is not held, this thread will run a critical section
    // without ever writing to memory. However, on ARM, this requires fencing. We use a load-acquire for
    // tryOptimisticRead(). We have no choice but to use the more expensive `dmb ish` in validate(). If
    // you want to avoid that, you could try to use tryOptimisticFencelessRead().
    Count tryOptimisticRead()
    {
        LockType currentValue = m_word.load();
        // FIXME: We could eliminate this check, if we think it's OK to proceed with the optimistic read
        // path even after knowing that it must fail. That's probably good for perf since we expect
        // failure to be super rare. We would get rid of this check and instead of calling getCount below,
        // we would return currentValue ^ mask. If the lock state was empty to begin with, the result
        // would be a properly blessed count (both low bits set). If the lock state was anything else, we
        // would get an improperly blessed count that would not possibly succeed in validate. We could
        // actually do something like "return (currentValue | hasParkedBit) ^ isHeldBit", which would mean
        // that we allow parked-but-not-held-locks through.
        // https://bugs.webkit.org/show_bug.cgi?id=180394
        if (currentValue & isHeldBit)
            return Count();
        return getCount(currentValue);
    }
    
    bool validate(Count count)
    {
        WTF::loadLoadFence();
        LockType currentValue = m_word.loadRelaxed();
        return getCount(currentValue) == count;
    }
    
    // Example of how to use this:
    //
    //     int read()
    //     {
    //         return m_lock.doOptimizedRead(
    //             [&] () -> int {
    //                 int value = m_things;
    //                 return value;
    //             });
    //     }
    template<typename Func>
    auto doOptimizedRead(const Func& func)
    {
        Count count = tryOptimisticRead();
        if (count) {
            auto result = func();
            if (validate(count))
                return result;
        }
        lock();
        auto result = func();
        unlock();
        return result;
    }
    
    // Example of how to use this:
    //
    //     int read()
    //     {
    //         auto result = m_lock.tryOptimisticFencelessRead();
    //         if (CountingLock::Count count = result.value) {
    //             Dependency fenceBefore = Dependency::fence(result.input);
    //             auto* fencedThis = fenceBefore.consume(this);
    //             int value = fencedThis->m_things;
    //             if (m_lock.fencelessValidate(count, Dependency::fence(value)))
    //                 return value; // success!
    //         }
    //         auto locker = holdLock(m_lock);
    //         int value = m_things;
    //         return value;
    //     }
    //
    // Use this to create a read transaction using dependency chains only. You have to be careful to
    // thread the dependency input (the `input` field that the returns) through a Dependency, and then
    // thread that Dependency into every load (except for loads that are chasing pointers loaded from
    // loads that already uses that dependency). Then, to validate the read transaction, you have to pass
    // both the count and another Dependency that is based on whatever loads you used to produce the
    // output.
    //
    // On non-ARM platforms, the Dependency objects don't do anything except for Dependency::fence, which
    // is a load-load fence. The idiom above does the right thing on both ARM and TSO.
    //
    // WARNING: This can be hard to get right. Please only use this for very short critical sections that
    // are known to be sufficiently perf-critical to justify the risk.
    InputAndValue<LockType, Count> tryOptimisticFencelessRead()
    {
        LockType currentValue = m_word.loadRelaxed();
        if (currentValue & isHeldBit)
            return inputAndValue(currentValue, Count());
        return inputAndValue(currentValue, getCount(currentValue));
    }
    
    bool fencelessValidate(Count count, Dependency dependency)
    {
        LockType currentValue = dependency.consume(this)->m_word.loadRelaxed();
        return getCount(currentValue) == count;
    }
    
    template<typename OptimisticFunc, typename Func>
    auto doOptimizedFencelessRead(const OptimisticFunc& optimisticFunc, const Func& func)
    {
        auto count = tryOptimisticFencelessRead();
        if (count.value) {
            Dependency dependency = Dependency::fence(count.input);
            auto result = optimisticFunc(dependency, count.value);
            if (fencelessValidate(count.value, dependency))
                return result;
        }
        lock();
        auto result = func();
        unlock();
        return result;
    }
    
private:
    WTF_EXPORT_PRIVATE void lockSlow();
    WTF_EXPORT_PRIVATE void unlockSlow();
    
    Count getCount(LockType value)
    {
        Count result;
        result.m_value = value | mask;
        return result;
    }
    
    Atomic<LockType> m_word { 0 };
};

} // namespace WTF

using WTF::CountingLock;
WTF-7608.4.9.1.3 2020-08-28 19:00:43 +00:00			`/*`
			`* Copyright (C) 2017 Apple Inc. All rights reserved.`
			`*`
			`* Redistribution and use in source and binary forms, with or without`
			`* modification, are permitted provided that the following conditions`
			`* are met:`
			`* 1. Redistributions of source code must retain the above copyright`
			`* notice, this list of conditions and the following disclaimer.`
			`* 2. Redistributions in binary form must reproduce the above copyright`
			`* notice, this list of conditions and the following disclaimer in the`
			`* documentation and/or other materials provided with the distribution.`
			`*`
			* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
			`* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE`
			`* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR`
			`* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR`
			`* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,`
			`* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,`
			`* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR`
			`* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY`
			`* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT`
			`* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE`
			`* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`
			`*/`

			`#pragma once`

			`#include <wtf/DataLog.h>`
			`#include <wtf/LockAlgorithm.h>`

			`namespace WTF {`

			`// This is mostly just a word-sized WTF::Lock. It supports basically everything that lock supports. But as`
			`// a bonus, it atomically counts lock() calls and allows you to perform an optimistic read transaction by`
			`// comparing the count before and after the transaction. If at the start of the transaction the lock is`
			`// not held and the count remains the same throughout the transaction, then you know that nobody could`
			`// have modified your data structure while you ran. You can even use this to optimistically read pointers`
			`// that could become dangling under concurrent writes, if you just revalidate the count every time you're`
			`// about to do something dangerous.`
			`//`
			`// This is largely inspired by StampedLock from Java:`
			`// https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/locks/CountingLock.html`
			`//`
			`// This is simplified a lot compared to StampedLock. Unlike StampedLock, it uses an exclusive lock as a`
			`// fallback. There is no way to acquire a CountingLock for read. The only read access is via optimistic`
			`// read transactions.`
			`//`
			`// CountingLock provides two ways of doing optimistic reads:`
			`//`
			`// - The easy way, where CountingLock does all of the fencing for you. That fencing is free on x86 but`
			`// somewhat expensive on ARM.`
			`// - The hard way, where you do fencing yourself using Dependency. This allows you to be fenceless on both`
			`// x86 and ARM.`
			`//`
			`// The latter is important for us because some GC paths are known to be sensitive to fences on ARM.`

Update Source To WTF-7611.3.10.1.3 2022-10-19 05:04:30 +00:00			`class CountingLock final {`
WTF-7608.4.9.1.3 2020-08-28 19:00:43 +00:00			`WTF_MAKE_NONCOPYABLE(CountingLock);`
			`WTF_MAKE_FAST_ALLOCATED;`

			`typedef unsigned LockType;`

			`static constexpr LockType isHeldBit = 1;`
			`static constexpr LockType hasParkedBit = 2;`
			`static constexpr LockType mask = isHeldBit \| hasParkedBit;`
			`static constexpr LockType shift = 2;`
			`static constexpr LockType countUnit = 4;`

			`struct LockHooks {`
			`static LockType lockHook(LockType value)`
			`{`
			`return value + countUnit;`
			`}`

			`static LockType unlockHook(LockType value) { return value; }`
			`static LockType parkHook(LockType value) { return value; }`
			`static LockType handoffHook(LockType value) { return value; }`
			`};`

			`typedef LockAlgorithm<LockType, isHeldBit, hasParkedBit, LockHooks> ExclusiveAlgorithm;`

			`public:`
			`CountingLock() = default;`

			`bool tryLock()`
			`{`
			`return ExclusiveAlgorithm::tryLock(m_word);`
			`}`

			`void lock()`
			`{`
			`if (UNLIKELY(!ExclusiveAlgorithm::lockFast(m_word)))`
			`lockSlow();`
			`}`

			`void unlock()`
			`{`
			`if (UNLIKELY(!ExclusiveAlgorithm::unlockFast(m_word)))`
			`unlockSlow();`
			`}`

			`bool isHeld() const`
			`{`
			`return ExclusiveAlgorithm::isLocked(m_word);`
			`}`

			`bool isLocked() const`
			`{`
			`return isHeld();`
			`}`

			`// The only thing you're allowed to infer from this value is that if it's zero, then you didn't get`
			`// a real count.`
			`class Count {`
			`public:`
			`explicit operator bool() const { return !!m_value; }`

			`bool operator==(const Count& other) const { return m_value == other.m_value; }`
			`bool operator!=(const Count& other) const { return m_value != other.m_value; }`

			`private:`
			`friend class CountingLock;`

			`LockType m_value { 0 };`
			`};`

			`// Example of how to use this:`
			`//`
			`// int read()`
			`// {`
			`// if (CountingLock::Count count = m_lock.tryOptimisticRead()) {`
			`// int value = m_things;`
			`// if (m_lock.validate(count))`
			`// return value; // success!`
			`// }`
			`// auto locker = holdLock(m_lock);`
			`// int value = m_things;`
			`// return value;`
			`// }`
			`//`
			`// If tryOptimisitcRead() runs when the lock is not held, this thread will run a critical section`
			`// without ever writing to memory. However, on ARM, this requires fencing. We use a load-acquire for`
			// tryOptimisticRead(). We have no choice but to use the more expensive `dmb ish` in validate(). If
			`// you want to avoid that, you could try to use tryOptimisticFencelessRead().`
			`Count tryOptimisticRead()`
			`{`
			`LockType currentValue = m_word.load();`
			`// FIXME: We could eliminate this check, if we think it's OK to proceed with the optimistic read`
			`// path even after knowing that it must fail. That's probably good for perf since we expect`
			`// failure to be super rare. We would get rid of this check and instead of calling getCount below,`
			`// we would return currentValue ^ mask. If the lock state was empty to begin with, the result`
			`// would be a properly blessed count (both low bits set). If the lock state was anything else, we`
			`// would get an improperly blessed count that would not possibly succeed in validate. We could`
			`// actually do something like "return (currentValue \| hasParkedBit) ^ isHeldBit", which would mean`
			`// that we allow parked-but-not-held-locks through.`
			`// https://bugs.webkit.org/show_bug.cgi?id=180394`
			`if (currentValue & isHeldBit)`
			`return Count();`
			`return getCount(currentValue);`
			`}`

			`bool validate(Count count)`
			`{`
			`WTF::loadLoadFence();`
			`LockType currentValue = m_word.loadRelaxed();`
			`return getCount(currentValue) == count;`
			`}`

			`// Example of how to use this:`
			`//`
			`// int read()`
			`// {`
			`// return m_lock.doOptimizedRead(`
			`// [&] () -> int {`
			`// int value = m_things;`
			`// return value;`
			`// });`
			`// }`
			`template<typename Func>`
			`auto doOptimizedRead(const Func& func)`
			`{`
			`Count count = tryOptimisticRead();`
			`if (count) {`
			`auto result = func();`
			`if (validate(count))`
			`return result;`
			`}`
			`lock();`
			`auto result = func();`
			`unlock();`
			`return result;`
			`}`

			`// Example of how to use this:`
			`//`
			`// int read()`
			`// {`
			`// auto result = m_lock.tryOptimisticFencelessRead();`
			`// if (CountingLock::Count count = result.value) {`
			`// Dependency fenceBefore = Dependency::fence(result.input);`
			`// auto* fencedThis = fenceBefore.consume(this);`
			`// int value = fencedThis->m_things;`
			`// if (m_lock.fencelessValidate(count, Dependency::fence(value)))`
			`// return value; // success!`
			`// }`
			`// auto locker = holdLock(m_lock);`
			`// int value = m_things;`
			`// return value;`
			`// }`
			`//`
			`// Use this to create a read transaction using dependency chains only. You have to be careful to`
			// thread the dependency input (the `input` field that the returns) through a Dependency, and then
			`// thread that Dependency into every load (except for loads that are chasing pointers loaded from`
			`// loads that already uses that dependency). Then, to validate the read transaction, you have to pass`
			`// both the count and another Dependency that is based on whatever loads you used to produce the`
			`// output.`
			`//`
			`// On non-ARM platforms, the Dependency objects don't do anything except for Dependency::fence, which`
			`// is a load-load fence. The idiom above does the right thing on both ARM and TSO.`
			`//`
			`// WARNING: This can be hard to get right. Please only use this for very short critical sections that`
			`// are known to be sufficiently perf-critical to justify the risk.`
			`InputAndValue<LockType, Count> tryOptimisticFencelessRead()`
			`{`
			`LockType currentValue = m_word.loadRelaxed();`
			`if (currentValue & isHeldBit)`
			`return inputAndValue(currentValue, Count());`
			`return inputAndValue(currentValue, getCount(currentValue));`
			`}`

			`bool fencelessValidate(Count count, Dependency dependency)`
			`{`
			`LockType currentValue = dependency.consume(this)->m_word.loadRelaxed();`
			`return getCount(currentValue) == count;`
			`}`

			`template<typename OptimisticFunc, typename Func>`
			`auto doOptimizedFencelessRead(const OptimisticFunc& optimisticFunc, const Func& func)`
			`{`
			`auto count = tryOptimisticFencelessRead();`
			`if (count.value) {`
			`Dependency dependency = Dependency::fence(count.input);`
			`auto result = optimisticFunc(dependency, count.value);`
			`if (fencelessValidate(count.value, dependency))`
			`return result;`
			`}`
			`lock();`
			`auto result = func();`
			`unlock();`
			`return result;`
			`}`

			`private:`
			`WTF_EXPORT_PRIVATE void lockSlow();`
			`WTF_EXPORT_PRIVATE void unlockSlow();`

			`Count getCount(LockType value)`
			`{`
			`Count result;`
			`result.m_value = value \| mask;`
			`return result;`
			`}`

			`Atomic<LockType> m_word { 0 };`
			`};`

			`} // namespace WTF`

			`using WTF::CountingLock;`