mirror of
https://github.com/FEX-Emu/linux.git
synced 2024-12-23 01:40:30 +00:00
Merge branch 'locking/core' into x86/core, to prepare for dependent patch
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
71966f3a0b
@ -1662,7 +1662,7 @@ CPU from reordering them.
|
||||
|
||||
There are some more advanced barrier functions:
|
||||
|
||||
(*) set_mb(var, value)
|
||||
(*) smp_store_mb(var, value)
|
||||
|
||||
This assigns the value to the variable and then inserts a full memory
|
||||
barrier after it, depending on the function. It isn't guaranteed to
|
||||
@ -1975,7 +1975,7 @@ after it has altered the task state:
|
||||
CPU 1
|
||||
===============================
|
||||
set_current_state();
|
||||
set_mb();
|
||||
smp_store_mb();
|
||||
STORE current->state
|
||||
<general barrier>
|
||||
LOAD event_indicated
|
||||
@ -2016,7 +2016,7 @@ between the STORE to indicate the event and the STORE to set TASK_RUNNING:
|
||||
CPU 1 CPU 2
|
||||
=============================== ===============================
|
||||
set_current_state(); STORE event_indicated
|
||||
set_mb(); wake_up();
|
||||
smp_store_mb(); wake_up();
|
||||
STORE current->state <write barrier>
|
||||
<general barrier> STORE current->state
|
||||
LOAD event_indicated
|
||||
|
@ -66,6 +66,4 @@
|
||||
#undef __ASM__MB
|
||||
#undef ____cmpxchg
|
||||
|
||||
#define __HAVE_ARCH_CMPXCHG 1
|
||||
|
||||
#endif /* _ALPHA_CMPXCHG_H */
|
||||
|
@ -81,7 +81,7 @@ do { \
|
||||
#define read_barrier_depends() do { } while(0)
|
||||
#define smp_read_barrier_depends() do { } while(0)
|
||||
|
||||
#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
|
||||
#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
|
||||
|
||||
#define smp_mb__before_atomic() smp_mb()
|
||||
#define smp_mb__after_atomic() smp_mb()
|
||||
|
@ -114,7 +114,7 @@ do { \
|
||||
#define read_barrier_depends() do { } while(0)
|
||||
#define smp_read_barrier_depends() do { } while(0)
|
||||
|
||||
#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
|
||||
#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
|
||||
#define nop() asm volatile("nop");
|
||||
|
||||
#define smp_mb__before_atomic() smp_mb()
|
||||
|
@ -70,8 +70,6 @@ extern unsigned long __cmpxchg_u64_unsupported_on_32bit_kernels(
|
||||
if something tries to do an invalid cmpxchg(). */
|
||||
extern void __cmpxchg_called_with_bad_pointer(void);
|
||||
|
||||
#define __HAVE_ARCH_CMPXCHG 1
|
||||
|
||||
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
|
||||
unsigned long new, int size)
|
||||
{
|
||||
|
@ -64,7 +64,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
|
||||
* looks just like atomic_cmpxchg on our arch currently with a bunch of
|
||||
* variable casting.
|
||||
*/
|
||||
#define __HAVE_ARCH_CMPXCHG 1
|
||||
|
||||
#define cmpxchg(ptr, old, new) \
|
||||
({ \
|
||||
|
@ -77,12 +77,7 @@ do { \
|
||||
___p1; \
|
||||
})
|
||||
|
||||
/*
|
||||
* XXX check on this ---I suspect what Linus really wants here is
|
||||
* acquire vs release semantics but we can't discuss this stuff with
|
||||
* Linus just yet. Grrr...
|
||||
*/
|
||||
#define set_mb(var, value) do { (var) = (value); mb(); } while (0)
|
||||
#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
|
||||
|
||||
/*
|
||||
* The group barrier in front of the rsm & ssm are necessary to ensure
|
||||
|
@ -61,8 +61,6 @@ extern void ia64_xchg_called_with_bad_pointer(void);
|
||||
* indicated by comparing RETURN with OLD.
|
||||
*/
|
||||
|
||||
#define __HAVE_ARCH_CMPXCHG 1
|
||||
|
||||
/*
|
||||
* This function doesn't exist, so you'll get a linker error
|
||||
* if something tries to do an invalid cmpxchg().
|
||||
|
@ -107,8 +107,6 @@ __xchg_local(unsigned long x, volatile void *ptr, int size)
|
||||
((__typeof__(*(ptr)))__xchg_local((unsigned long)(x), (ptr), \
|
||||
sizeof(*(ptr))))
|
||||
|
||||
#define __HAVE_ARCH_CMPXCHG 1
|
||||
|
||||
static inline unsigned long
|
||||
__cmpxchg_u32(volatile unsigned int *p, unsigned int old, unsigned int new)
|
||||
{
|
||||
|
@ -90,7 +90,6 @@ extern unsigned long __invalid_cmpxchg_size(volatile void *,
|
||||
* indicated by comparing RETURN with OLD.
|
||||
*/
|
||||
#ifdef CONFIG_RMW_INSNS
|
||||
#define __HAVE_ARCH_CMPXCHG 1
|
||||
|
||||
static inline unsigned long __cmpxchg(volatile void *p, unsigned long old,
|
||||
unsigned long new, int size)
|
||||
|
@ -84,7 +84,7 @@ static inline void fence(void)
|
||||
#define read_barrier_depends() do { } while (0)
|
||||
#define smp_read_barrier_depends() do { } while (0)
|
||||
|
||||
#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
|
||||
#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
|
||||
|
||||
#define smp_store_release(p, v) \
|
||||
do { \
|
||||
|
@ -51,8 +51,6 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
|
||||
return old;
|
||||
}
|
||||
|
||||
#define __HAVE_ARCH_CMPXCHG 1
|
||||
|
||||
#define cmpxchg(ptr, o, n) \
|
||||
({ \
|
||||
__typeof__(*(ptr)) _o_ = (o); \
|
||||
|
@ -112,8 +112,8 @@
|
||||
#define __WEAK_LLSC_MB " \n"
|
||||
#endif
|
||||
|
||||
#define set_mb(var, value) \
|
||||
do { var = value; smp_mb(); } while (0)
|
||||
#define smp_store_mb(var, value) \
|
||||
do { WRITE_ONCE(var, value); smp_mb(); } while (0)
|
||||
|
||||
#define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
|
||||
|
||||
|
@ -138,8 +138,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
|
||||
__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))); \
|
||||
})
|
||||
|
||||
#define __HAVE_ARCH_CMPXCHG 1
|
||||
|
||||
#define __cmpxchg_asm(ld, st, m, old, new) \
|
||||
({ \
|
||||
__typeof(*(m)) __ret; \
|
||||
|
@ -46,8 +46,6 @@ __xchg(unsigned long x, __volatile__ void *ptr, int size)
|
||||
#define xchg(ptr, x) \
|
||||
((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
|
||||
|
||||
#define __HAVE_ARCH_CMPXCHG 1
|
||||
|
||||
/* bug catcher for when unsupported size is used - won't link */
|
||||
extern void __cmpxchg_called_with_bad_pointer(void);
|
||||
|
||||
|
@ -34,7 +34,7 @@
|
||||
#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
|
||||
#define wmb() __asm__ __volatile__ ("sync" : : : "memory")
|
||||
|
||||
#define set_mb(var, value) do { var = value; mb(); } while (0)
|
||||
#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
|
||||
|
||||
#ifdef __SUBARCH_HAS_LWSYNC
|
||||
# define SMPWMB LWSYNC
|
||||
|
@ -144,7 +144,6 @@ __xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
|
||||
* Compare and exchange - if *p == old, set it to new,
|
||||
* and return the old value of *p.
|
||||
*/
|
||||
#define __HAVE_ARCH_CMPXCHG 1
|
||||
|
||||
static __always_inline unsigned long
|
||||
__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
|
||||
|
@ -36,7 +36,7 @@
|
||||
#define smp_mb__before_atomic() smp_mb()
|
||||
#define smp_mb__after_atomic() smp_mb()
|
||||
|
||||
#define set_mb(var, value) do { var = value; mb(); } while (0)
|
||||
#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
|
||||
|
||||
#define smp_store_release(p, v) \
|
||||
do { \
|
||||
|
@ -32,8 +32,6 @@
|
||||
__old; \
|
||||
})
|
||||
|
||||
#define __HAVE_ARCH_CMPXCHG
|
||||
|
||||
#define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \
|
||||
({ \
|
||||
register __typeof__(*(p1)) __old1 asm("2") = (o1); \
|
||||
|
@ -42,8 +42,6 @@ static inline unsigned long __cmpxchg(volatile unsigned long *m,
|
||||
(unsigned long)(o), \
|
||||
(unsigned long)(n)))
|
||||
|
||||
#define __HAVE_ARCH_CMPXCHG 1
|
||||
|
||||
#include <asm-generic/cmpxchg-local.h>
|
||||
|
||||
#endif /* _ASM_SCORE_CMPXCHG_H */
|
||||
|
@ -32,7 +32,7 @@
|
||||
#define ctrl_barrier() __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop")
|
||||
#endif
|
||||
|
||||
#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
|
||||
#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
|
||||
|
||||
#include <asm-generic/barrier.h>
|
||||
|
||||
|
@ -46,8 +46,6 @@ extern void __xchg_called_with_bad_pointer(void);
|
||||
* if something tries to do an invalid cmpxchg(). */
|
||||
extern void __cmpxchg_called_with_bad_pointer(void);
|
||||
|
||||
#define __HAVE_ARCH_CMPXCHG 1
|
||||
|
||||
static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old,
|
||||
unsigned long new, int size)
|
||||
{
|
||||
|
@ -40,8 +40,8 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
|
||||
#define dma_rmb() rmb()
|
||||
#define dma_wmb() wmb()
|
||||
|
||||
#define set_mb(__var, __value) \
|
||||
do { __var = __value; membar_safe("#StoreLoad"); } while(0)
|
||||
#define smp_store_mb(__var, __value) \
|
||||
do { WRITE_ONCE(__var, __value); membar_safe("#StoreLoad"); } while(0)
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
#define smp_mb() mb()
|
||||
|
@ -34,7 +34,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int
|
||||
*
|
||||
* Cribbed from <asm-parisc/atomic.h>
|
||||
*/
|
||||
#define __HAVE_ARCH_CMPXCHG 1
|
||||
|
||||
/* bug catcher for when unsupported size is used - won't link */
|
||||
void __cmpxchg_called_with_bad_pointer(void);
|
||||
|
@ -65,8 +65,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr,
|
||||
|
||||
#include <asm-generic/cmpxchg-local.h>
|
||||
|
||||
#define __HAVE_ARCH_CMPXCHG 1
|
||||
|
||||
static inline unsigned long
|
||||
__cmpxchg_u32(volatile int *m, int old, int new)
|
||||
{
|
||||
|
@ -105,9 +105,6 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
|
||||
|
||||
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
|
||||
|
||||
/* Define this to indicate that cmpxchg is an efficient operation. */
|
||||
#define __HAVE_ARCH_CMPXCHG
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
#endif /* _ASM_TILE_ATOMIC_64_H */
|
||||
|
@ -127,7 +127,8 @@ config X86
|
||||
select MODULES_USE_ELF_RELA if X86_64
|
||||
select CLONE_BACKWARDS if X86_32
|
||||
select ARCH_USE_BUILTIN_BSWAP
|
||||
select ARCH_USE_QUEUE_RWLOCK
|
||||
select ARCH_USE_QUEUED_SPINLOCKS
|
||||
select ARCH_USE_QUEUED_RWLOCKS
|
||||
select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
|
||||
select OLD_SIGACTION if X86_32
|
||||
select COMPAT_OLD_SIGACTION if IA32_EMULATION
|
||||
@ -666,7 +667,7 @@ config PARAVIRT_DEBUG
|
||||
config PARAVIRT_SPINLOCKS
|
||||
bool "Paravirtualization layer for spinlocks"
|
||||
depends on PARAVIRT && SMP
|
||||
select UNINLINE_SPIN_UNLOCK
|
||||
select UNINLINE_SPIN_UNLOCK if !QUEUED_SPINLOCKS
|
||||
---help---
|
||||
Paravirtualized spinlocks allow a pvops backend to replace the
|
||||
spinlock implementation with something virtualization-friendly
|
||||
|
@ -35,12 +35,12 @@
|
||||
#define smp_mb() mb()
|
||||
#define smp_rmb() dma_rmb()
|
||||
#define smp_wmb() barrier()
|
||||
#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
|
||||
#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
|
||||
#else /* !SMP */
|
||||
#define smp_mb() barrier()
|
||||
#define smp_rmb() barrier()
|
||||
#define smp_wmb() barrier()
|
||||
#define set_mb(var, value) do { var = value; barrier(); } while (0)
|
||||
#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
|
||||
#endif /* SMP */
|
||||
|
||||
#define read_barrier_depends() do { } while (0)
|
||||
|
@ -4,8 +4,6 @@
|
||||
#include <linux/compiler.h>
|
||||
#include <asm/alternative.h> /* Provides LOCK_PREFIX */
|
||||
|
||||
#define __HAVE_ARCH_CMPXCHG 1
|
||||
|
||||
/*
|
||||
* Non-existant functions to indicate usage errors at link time
|
||||
* (or compile-time if the compiler implements __compiletime_error().
|
||||
|
@ -712,6 +712,31 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
|
||||
|
||||
#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
|
||||
|
||||
#ifdef CONFIG_QUEUED_SPINLOCKS
|
||||
|
||||
static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock,
|
||||
u32 val)
|
||||
{
|
||||
PVOP_VCALL2(pv_lock_ops.queued_spin_lock_slowpath, lock, val);
|
||||
}
|
||||
|
||||
static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock)
|
||||
{
|
||||
PVOP_VCALLEE1(pv_lock_ops.queued_spin_unlock, lock);
|
||||
}
|
||||
|
||||
static __always_inline void pv_wait(u8 *ptr, u8 val)
|
||||
{
|
||||
PVOP_VCALL2(pv_lock_ops.wait, ptr, val);
|
||||
}
|
||||
|
||||
static __always_inline void pv_kick(int cpu)
|
||||
{
|
||||
PVOP_VCALL1(pv_lock_ops.kick, cpu);
|
||||
}
|
||||
|
||||
#else /* !CONFIG_QUEUED_SPINLOCKS */
|
||||
|
||||
static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
|
||||
__ticket_t ticket)
|
||||
{
|
||||
@ -724,7 +749,9 @@ static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
|
||||
PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif /* CONFIG_QUEUED_SPINLOCKS */
|
||||
|
||||
#endif /* SMP && PARAVIRT_SPINLOCKS */
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
#define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
|
||||
|
@ -334,9 +334,19 @@ struct arch_spinlock;
|
||||
typedef u16 __ticket_t;
|
||||
#endif
|
||||
|
||||
struct qspinlock;
|
||||
|
||||
struct pv_lock_ops {
|
||||
#ifdef CONFIG_QUEUED_SPINLOCKS
|
||||
void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val);
|
||||
struct paravirt_callee_save queued_spin_unlock;
|
||||
|
||||
void (*wait)(u8 *ptr, u8 val);
|
||||
void (*kick)(int cpu);
|
||||
#else /* !CONFIG_QUEUED_SPINLOCKS */
|
||||
struct paravirt_callee_save lock_spinning;
|
||||
void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
|
||||
#endif /* !CONFIG_QUEUED_SPINLOCKS */
|
||||
};
|
||||
|
||||
/* This contains all the paravirt structures: we get a convenient
|
||||
|
57
arch/x86/include/asm/qspinlock.h
Normal file
57
arch/x86/include/asm/qspinlock.h
Normal file
@ -0,0 +1,57 @@
|
||||
#ifndef _ASM_X86_QSPINLOCK_H
|
||||
#define _ASM_X86_QSPINLOCK_H
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm-generic/qspinlock_types.h>
|
||||
#include <asm/paravirt.h>
|
||||
|
||||
#define queued_spin_unlock queued_spin_unlock
|
||||
/**
|
||||
* queued_spin_unlock - release a queued spinlock
|
||||
* @lock : Pointer to queued spinlock structure
|
||||
*
|
||||
* A smp_store_release() on the least-significant byte.
|
||||
*/
|
||||
static inline void native_queued_spin_unlock(struct qspinlock *lock)
|
||||
{
|
||||
smp_store_release((u8 *)lock, 0);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_SPINLOCKS
|
||||
extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
|
||||
extern void __pv_init_lock_hash(void);
|
||||
extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
|
||||
extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
|
||||
|
||||
static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
|
||||
{
|
||||
pv_queued_spin_lock_slowpath(lock, val);
|
||||
}
|
||||
|
||||
static inline void queued_spin_unlock(struct qspinlock *lock)
|
||||
{
|
||||
pv_queued_spin_unlock(lock);
|
||||
}
|
||||
#else
|
||||
static inline void queued_spin_unlock(struct qspinlock *lock)
|
||||
{
|
||||
native_queued_spin_unlock(lock);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define virt_queued_spin_lock virt_queued_spin_lock
|
||||
|
||||
static inline bool virt_queued_spin_lock(struct qspinlock *lock)
|
||||
{
|
||||
if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
|
||||
return false;
|
||||
|
||||
while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0)
|
||||
cpu_relax();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#include <asm-generic/qspinlock.h>
|
||||
|
||||
#endif /* _ASM_X86_QSPINLOCK_H */
|
6
arch/x86/include/asm/qspinlock_paravirt.h
Normal file
6
arch/x86/include/asm/qspinlock_paravirt.h
Normal file
@ -0,0 +1,6 @@
|
||||
#ifndef __ASM_QSPINLOCK_PARAVIRT_H
|
||||
#define __ASM_QSPINLOCK_PARAVIRT_H
|
||||
|
||||
PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock);
|
||||
|
||||
#endif
|
@ -42,6 +42,10 @@
|
||||
extern struct static_key paravirt_ticketlocks_enabled;
|
||||
static __always_inline bool static_key_false(struct static_key *key);
|
||||
|
||||
#ifdef CONFIG_QUEUED_SPINLOCKS
|
||||
#include <asm/qspinlock.h>
|
||||
#else
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_SPINLOCKS
|
||||
|
||||
static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
|
||||
@ -196,6 +200,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
|
||||
cpu_relax();
|
||||
}
|
||||
}
|
||||
#endif /* CONFIG_QUEUED_SPINLOCKS */
|
||||
|
||||
/*
|
||||
* Read-write spinlocks, allowing multiple readers
|
||||
|
@ -23,6 +23,9 @@ typedef u32 __ticketpair_t;
|
||||
|
||||
#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
|
||||
|
||||
#ifdef CONFIG_QUEUED_SPINLOCKS
|
||||
#include <asm-generic/qspinlock_types.h>
|
||||
#else
|
||||
typedef struct arch_spinlock {
|
||||
union {
|
||||
__ticketpair_t head_tail;
|
||||
@ -33,6 +36,7 @@ typedef struct arch_spinlock {
|
||||
} arch_spinlock_t;
|
||||
|
||||
#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
|
||||
#endif /* CONFIG_QUEUED_SPINLOCKS */
|
||||
|
||||
#include <asm-generic/qrwlock_types.h>
|
||||
|
||||
|
@ -584,6 +584,39 @@ static void kvm_kick_cpu(int cpu)
|
||||
kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
|
||||
}
|
||||
|
||||
|
||||
#ifdef CONFIG_QUEUED_SPINLOCKS
|
||||
|
||||
#include <asm/qspinlock.h>
|
||||
|
||||
static void kvm_wait(u8 *ptr, u8 val)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (in_nmi())
|
||||
return;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
if (READ_ONCE(*ptr) != val)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* halt until it's our turn and kicked. Note that we do safe halt
|
||||
* for irq enabled case to avoid hang when lock info is overwritten
|
||||
* in irq spinlock slowpath and no spurious interrupt occur to save us.
|
||||
*/
|
||||
if (arch_irqs_disabled_flags(flags))
|
||||
halt();
|
||||
else
|
||||
safe_halt();
|
||||
|
||||
out:
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
#else /* !CONFIG_QUEUED_SPINLOCKS */
|
||||
|
||||
enum kvm_contention_stat {
|
||||
TAKEN_SLOW,
|
||||
TAKEN_SLOW_PICKUP,
|
||||
@ -817,6 +850,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* !CONFIG_QUEUED_SPINLOCKS */
|
||||
|
||||
/*
|
||||
* Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
|
||||
*/
|
||||
@ -828,8 +863,16 @@ void __init kvm_spinlock_init(void)
|
||||
if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
|
||||
return;
|
||||
|
||||
#ifdef CONFIG_QUEUED_SPINLOCKS
|
||||
__pv_init_lock_hash();
|
||||
pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
|
||||
pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
|
||||
pv_lock_ops.wait = kvm_wait;
|
||||
pv_lock_ops.kick = kvm_kick_cpu;
|
||||
#else /* !CONFIG_QUEUED_SPINLOCKS */
|
||||
pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
|
||||
pv_lock_ops.unlock_kick = kvm_unlock_kick;
|
||||
#endif
|
||||
}
|
||||
|
||||
static __init int kvm_spinlock_init_jump(void)
|
||||
|
@ -8,11 +8,33 @@
|
||||
|
||||
#include <asm/paravirt.h>
|
||||
|
||||
#ifdef CONFIG_QUEUED_SPINLOCKS
|
||||
__visible void __native_queued_spin_unlock(struct qspinlock *lock)
|
||||
{
|
||||
native_queued_spin_unlock(lock);
|
||||
}
|
||||
|
||||
PV_CALLEE_SAVE_REGS_THUNK(__native_queued_spin_unlock);
|
||||
|
||||
bool pv_is_native_spin_unlock(void)
|
||||
{
|
||||
return pv_lock_ops.queued_spin_unlock.func ==
|
||||
__raw_callee_save___native_queued_spin_unlock;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct pv_lock_ops pv_lock_ops = {
|
||||
#ifdef CONFIG_SMP
|
||||
#ifdef CONFIG_QUEUED_SPINLOCKS
|
||||
.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
|
||||
.queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock),
|
||||
.wait = paravirt_nop,
|
||||
.kick = paravirt_nop,
|
||||
#else /* !CONFIG_QUEUED_SPINLOCKS */
|
||||
.lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
|
||||
.unlock_kick = paravirt_nop,
|
||||
#endif
|
||||
#endif /* !CONFIG_QUEUED_SPINLOCKS */
|
||||
#endif /* SMP */
|
||||
};
|
||||
EXPORT_SYMBOL(pv_lock_ops);
|
||||
|
||||
|
@ -12,6 +12,10 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
|
||||
DEF_NATIVE(pv_cpu_ops, clts, "clts");
|
||||
DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
|
||||
|
||||
#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
|
||||
DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
|
||||
#endif
|
||||
|
||||
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
|
||||
{
|
||||
/* arg in %eax, return in %eax */
|
||||
@ -24,6 +28,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern bool pv_is_native_spin_unlock(void);
|
||||
|
||||
unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
|
||||
unsigned long addr, unsigned len)
|
||||
{
|
||||
@ -47,14 +53,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
|
||||
PATCH_SITE(pv_mmu_ops, write_cr3);
|
||||
PATCH_SITE(pv_cpu_ops, clts);
|
||||
PATCH_SITE(pv_cpu_ops, read_tsc);
|
||||
|
||||
patch_site:
|
||||
ret = paravirt_patch_insns(ibuf, len, start, end);
|
||||
break;
|
||||
#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
|
||||
case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
|
||||
if (pv_is_native_spin_unlock()) {
|
||||
start = start_pv_lock_ops_queued_spin_unlock;
|
||||
end = end_pv_lock_ops_queued_spin_unlock;
|
||||
goto patch_site;
|
||||
}
|
||||
#endif
|
||||
|
||||
default:
|
||||
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
|
||||
break;
|
||||
|
||||
patch_site:
|
||||
ret = paravirt_patch_insns(ibuf, len, start, end);
|
||||
break;
|
||||
}
|
||||
#undef PATCH_SITE
|
||||
return ret;
|
||||
|
@ -21,6 +21,10 @@ DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
|
||||
DEF_NATIVE(, mov32, "mov %edi, %eax");
|
||||
DEF_NATIVE(, mov64, "mov %rdi, %rax");
|
||||
|
||||
#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
|
||||
DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
|
||||
#endif
|
||||
|
||||
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
|
||||
{
|
||||
return paravirt_patch_insns(insnbuf, len,
|
||||
@ -33,6 +37,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
|
||||
start__mov64, end__mov64);
|
||||
}
|
||||
|
||||
extern bool pv_is_native_spin_unlock(void);
|
||||
|
||||
unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
|
||||
unsigned long addr, unsigned len)
|
||||
{
|
||||
@ -58,14 +64,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
|
||||
PATCH_SITE(pv_cpu_ops, clts);
|
||||
PATCH_SITE(pv_mmu_ops, flush_tlb_single);
|
||||
PATCH_SITE(pv_cpu_ops, wbinvd);
|
||||
|
||||
patch_site:
|
||||
ret = paravirt_patch_insns(ibuf, len, start, end);
|
||||
break;
|
||||
#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
|
||||
case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
|
||||
if (pv_is_native_spin_unlock()) {
|
||||
start = start_pv_lock_ops_queued_spin_unlock;
|
||||
end = end_pv_lock_ops_queued_spin_unlock;
|
||||
goto patch_site;
|
||||
}
|
||||
#endif
|
||||
|
||||
default:
|
||||
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
|
||||
break;
|
||||
|
||||
patch_site:
|
||||
ret = paravirt_patch_insns(ibuf, len, start, end);
|
||||
break;
|
||||
}
|
||||
#undef PATCH_SITE
|
||||
return ret;
|
||||
|
@ -39,7 +39,8 @@
|
||||
#define smp_mb() barrier()
|
||||
#define smp_rmb() barrier()
|
||||
#define smp_wmb() barrier()
|
||||
#define set_mb(var, value) do { var = value; barrier(); } while (0)
|
||||
|
||||
#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
|
||||
|
||||
#define read_barrier_depends() do { } while (0)
|
||||
#define smp_read_barrier_depends() do { } while (0)
|
||||
|
@ -17,6 +17,56 @@
|
||||
#include "xen-ops.h"
|
||||
#include "debugfs.h"
|
||||
|
||||
static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
|
||||
static DEFINE_PER_CPU(char *, irq_name);
|
||||
static bool xen_pvspin = true;
|
||||
|
||||
#ifdef CONFIG_QUEUED_SPINLOCKS
|
||||
|
||||
#include <asm/qspinlock.h>
|
||||
|
||||
static void xen_qlock_kick(int cpu)
|
||||
{
|
||||
xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
|
||||
}
|
||||
|
||||
/*
|
||||
* Halt the current CPU & release it back to the host
|
||||
*/
|
||||
static void xen_qlock_wait(u8 *byte, u8 val)
|
||||
{
|
||||
int irq = __this_cpu_read(lock_kicker_irq);
|
||||
|
||||
/* If kicker interrupts not initialized yet, just spin */
|
||||
if (irq == -1)
|
||||
return;
|
||||
|
||||
/* clear pending */
|
||||
xen_clear_irq_pending(irq);
|
||||
barrier();
|
||||
|
||||
/*
|
||||
* We check the byte value after clearing pending IRQ to make sure
|
||||
* that we won't miss a wakeup event because of the clearing.
|
||||
*
|
||||
* The sync_clear_bit() call in xen_clear_irq_pending() is atomic.
|
||||
* So it is effectively a memory barrier for x86.
|
||||
*/
|
||||
if (READ_ONCE(*byte) != val)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If an interrupt happens here, it will leave the wakeup irq
|
||||
* pending, which will cause xen_poll_irq() to return
|
||||
* immediately.
|
||||
*/
|
||||
|
||||
/* Block until irq becomes pending (or perhaps a spurious wakeup) */
|
||||
xen_poll_irq(irq);
|
||||
}
|
||||
|
||||
#else /* CONFIG_QUEUED_SPINLOCKS */
|
||||
|
||||
enum xen_contention_stat {
|
||||
TAKEN_SLOW,
|
||||
TAKEN_SLOW_PICKUP,
|
||||
@ -100,12 +150,9 @@ struct xen_lock_waiting {
|
||||
__ticket_t want;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
|
||||
static DEFINE_PER_CPU(char *, irq_name);
|
||||
static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
|
||||
static cpumask_t waiting_cpus;
|
||||
|
||||
static bool xen_pvspin = true;
|
||||
__visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
|
||||
{
|
||||
int irq = __this_cpu_read(lock_kicker_irq);
|
||||
@ -217,6 +264,7 @@ static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* CONFIG_QUEUED_SPINLOCKS */
|
||||
|
||||
static irqreturn_t dummy_handler(int irq, void *dev_id)
|
||||
{
|
||||
@ -280,8 +328,16 @@ void __init xen_init_spinlocks(void)
|
||||
return;
|
||||
}
|
||||
printk(KERN_DEBUG "xen: PV spinlocks enabled\n");
|
||||
#ifdef CONFIG_QUEUED_SPINLOCKS
|
||||
__pv_init_lock_hash();
|
||||
pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
|
||||
pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
|
||||
pv_lock_ops.wait = xen_qlock_wait;
|
||||
pv_lock_ops.kick = xen_qlock_kick;
|
||||
#else
|
||||
pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
|
||||
pv_lock_ops.unlock_kick = xen_unlock_kick;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
@ -310,7 +366,7 @@ static __init int xen_parse_nopvspin(char *arg)
|
||||
}
|
||||
early_param("xen_nopvspin", xen_parse_nopvspin);
|
||||
|
||||
#ifdef CONFIG_XEN_DEBUG_FS
|
||||
#if defined(CONFIG_XEN_DEBUG_FS) && !defined(CONFIG_QUEUED_SPINLOCKS)
|
||||
|
||||
static struct dentry *d_spin_debug;
|
||||
|
||||
|
@ -189,7 +189,7 @@ static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
|
||||
* doesn't imply write barrier and the users expect write
|
||||
* barrier semantics on wakeup functions. The following
|
||||
* smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
|
||||
* and is paired with set_mb() in poll_schedule_timeout.
|
||||
* and is paired with smp_store_mb() in poll_schedule_timeout.
|
||||
*/
|
||||
smp_wmb();
|
||||
pwq->triggered = 1;
|
||||
@ -244,7 +244,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
|
||||
/*
|
||||
* Prepare for the next iteration.
|
||||
*
|
||||
* The following set_mb() serves two purposes. First, it's
|
||||
* The following smp_store_mb() serves two purposes. First, it's
|
||||
* the counterpart rmb of the wmb in pollwake() such that data
|
||||
* written before wake up is always visible after wake up.
|
||||
* Second, the full barrier guarantees that triggered clearing
|
||||
@ -252,7 +252,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
|
||||
* this problem doesn't exist for the first iteration as
|
||||
* add_wait_queue() has full barrier semantics.
|
||||
*/
|
||||
set_mb(pwq->triggered, 0);
|
||||
smp_store_mb(pwq->triggered, 0);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
@ -66,8 +66,8 @@
|
||||
#define smp_read_barrier_depends() do { } while (0)
|
||||
#endif
|
||||
|
||||
#ifndef set_mb
|
||||
#define set_mb(var, value) do { (var) = (value); mb(); } while (0)
|
||||
#ifndef smp_store_mb
|
||||
#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0)
|
||||
#endif
|
||||
|
||||
#ifndef smp_mb__before_atomic
|
||||
|
@ -86,9 +86,6 @@ unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
|
||||
|
||||
/*
|
||||
* Atomic compare and exchange.
|
||||
*
|
||||
* Do not define __HAVE_ARCH_CMPXCHG because we want to use it to check whether
|
||||
* a cmpxchg primitive faster than repeated local irq save/restore exists.
|
||||
*/
|
||||
#include <asm-generic/cmpxchg-local.h>
|
||||
|
||||
|
139
include/asm-generic/qspinlock.h
Normal file
139
include/asm-generic/qspinlock.h
Normal file
@ -0,0 +1,139 @@
|
||||
/*
|
||||
* Queued spinlock
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
|
||||
*
|
||||
* Authors: Waiman Long <waiman.long@hp.com>
|
||||
*/
|
||||
#ifndef __ASM_GENERIC_QSPINLOCK_H
|
||||
#define __ASM_GENERIC_QSPINLOCK_H
|
||||
|
||||
#include <asm-generic/qspinlock_types.h>
|
||||
|
||||
/**
|
||||
* queued_spin_is_locked - is the spinlock locked?
|
||||
* @lock: Pointer to queued spinlock structure
|
||||
* Return: 1 if it is locked, 0 otherwise
|
||||
*/
|
||||
static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
|
||||
{
|
||||
return atomic_read(&lock->val);
|
||||
}
|
||||
|
||||
/**
|
||||
* queued_spin_value_unlocked - is the spinlock structure unlocked?
|
||||
* @lock: queued spinlock structure
|
||||
* Return: 1 if it is unlocked, 0 otherwise
|
||||
*
|
||||
* N.B. Whenever there are tasks waiting for the lock, it is considered
|
||||
* locked wrt the lockref code to avoid lock stealing by the lockref
|
||||
* code and change things underneath the lock. This also allows some
|
||||
* optimizations to be applied without conflict with lockref.
|
||||
*/
|
||||
static __always_inline int queued_spin_value_unlocked(struct qspinlock lock)
|
||||
{
|
||||
return !atomic_read(&lock.val);
|
||||
}
|
||||
|
||||
/**
|
||||
* queued_spin_is_contended - check if the lock is contended
|
||||
* @lock : Pointer to queued spinlock structure
|
||||
* Return: 1 if lock contended, 0 otherwise
|
||||
*/
|
||||
static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
|
||||
{
|
||||
return atomic_read(&lock->val) & ~_Q_LOCKED_MASK;
|
||||
}
|
||||
/**
|
||||
* queued_spin_trylock - try to acquire the queued spinlock
|
||||
* @lock : Pointer to queued spinlock structure
|
||||
* Return: 1 if lock acquired, 0 if failed
|
||||
*/
|
||||
static __always_inline int queued_spin_trylock(struct qspinlock *lock)
|
||||
{
|
||||
if (!atomic_read(&lock->val) &&
|
||||
(atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) == 0))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
|
||||
|
||||
/**
|
||||
* queued_spin_lock - acquire a queued spinlock
|
||||
* @lock: Pointer to queued spinlock structure
|
||||
*/
|
||||
static __always_inline void queued_spin_lock(struct qspinlock *lock)
|
||||
{
|
||||
u32 val;
|
||||
|
||||
val = atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL);
|
||||
if (likely(val == 0))
|
||||
return;
|
||||
queued_spin_lock_slowpath(lock, val);
|
||||
}
|
||||
|
||||
#ifndef queued_spin_unlock
|
||||
/**
|
||||
* queued_spin_unlock - release a queued spinlock
|
||||
* @lock : Pointer to queued spinlock structure
|
||||
*/
|
||||
static __always_inline void queued_spin_unlock(struct qspinlock *lock)
|
||||
{
|
||||
/*
|
||||
* smp_mb__before_atomic() in order to guarantee release semantics
|
||||
*/
|
||||
smp_mb__before_atomic_dec();
|
||||
atomic_sub(_Q_LOCKED_VAL, &lock->val);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* queued_spin_unlock_wait - wait until current lock holder releases the lock
|
||||
* @lock : Pointer to queued spinlock structure
|
||||
*
|
||||
* There is a very slight possibility of live-lock if the lockers keep coming
|
||||
* and the waiter is just unfortunate enough to not see any unlock state.
|
||||
*/
|
||||
static inline void queued_spin_unlock_wait(struct qspinlock *lock)
|
||||
{
|
||||
while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
#ifndef virt_queued_spin_lock
|
||||
static __always_inline bool virt_queued_spin_lock(struct qspinlock *lock)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Initializier
|
||||
*/
|
||||
#define __ARCH_SPIN_LOCK_UNLOCKED { ATOMIC_INIT(0) }
|
||||
|
||||
/*
|
||||
* Remapping spinlock architecture specific functions to the corresponding
|
||||
* queued spinlock functions.
|
||||
*/
|
||||
#define arch_spin_is_locked(l) queued_spin_is_locked(l)
|
||||
#define arch_spin_is_contended(l) queued_spin_is_contended(l)
|
||||
#define arch_spin_value_unlocked(l) queued_spin_value_unlocked(l)
|
||||
#define arch_spin_lock(l) queued_spin_lock(l)
|
||||
#define arch_spin_trylock(l) queued_spin_trylock(l)
|
||||
#define arch_spin_unlock(l) queued_spin_unlock(l)
|
||||
#define arch_spin_lock_flags(l, f) queued_spin_lock(l)
|
||||
#define arch_spin_unlock_wait(l) queued_spin_unlock_wait(l)
|
||||
|
||||
#endif /* __ASM_GENERIC_QSPINLOCK_H */
|
79
include/asm-generic/qspinlock_types.h
Normal file
79
include/asm-generic/qspinlock_types.h
Normal file
@ -0,0 +1,79 @@
|
||||
/*
|
||||
* Queued spinlock
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
|
||||
*
|
||||
* Authors: Waiman Long <waiman.long@hp.com>
|
||||
*/
|
||||
#ifndef __ASM_GENERIC_QSPINLOCK_TYPES_H
|
||||
#define __ASM_GENERIC_QSPINLOCK_TYPES_H
|
||||
|
||||
/*
|
||||
* Including atomic.h with PARAVIRT on will cause compilation errors because
|
||||
* of recursive header file incluson via paravirt_types.h. So don't include
|
||||
* it if PARAVIRT is on.
|
||||
*/
|
||||
#ifndef CONFIG_PARAVIRT
|
||||
#include <linux/types.h>
|
||||
#include <linux/atomic.h>
|
||||
#endif
|
||||
|
||||
typedef struct qspinlock {
|
||||
atomic_t val;
|
||||
} arch_spinlock_t;
|
||||
|
||||
/*
|
||||
* Bitfields in the atomic value:
|
||||
*
|
||||
* When NR_CPUS < 16K
|
||||
* 0- 7: locked byte
|
||||
* 8: pending
|
||||
* 9-15: not used
|
||||
* 16-17: tail index
|
||||
* 18-31: tail cpu (+1)
|
||||
*
|
||||
* When NR_CPUS >= 16K
|
||||
* 0- 7: locked byte
|
||||
* 8: pending
|
||||
* 9-10: tail index
|
||||
* 11-31: tail cpu (+1)
|
||||
*/
|
||||
#define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\
|
||||
<< _Q_ ## type ## _OFFSET)
|
||||
#define _Q_LOCKED_OFFSET 0
|
||||
#define _Q_LOCKED_BITS 8
|
||||
#define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED)
|
||||
|
||||
#define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
|
||||
#if CONFIG_NR_CPUS < (1U << 14)
|
||||
#define _Q_PENDING_BITS 8
|
||||
#else
|
||||
#define _Q_PENDING_BITS 1
|
||||
#endif
|
||||
#define _Q_PENDING_MASK _Q_SET_MASK(PENDING)
|
||||
|
||||
#define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS)
|
||||
#define _Q_TAIL_IDX_BITS 2
|
||||
#define _Q_TAIL_IDX_MASK _Q_SET_MASK(TAIL_IDX)
|
||||
|
||||
#define _Q_TAIL_CPU_OFFSET (_Q_TAIL_IDX_OFFSET + _Q_TAIL_IDX_BITS)
|
||||
#define _Q_TAIL_CPU_BITS (32 - _Q_TAIL_CPU_OFFSET)
|
||||
#define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU)
|
||||
|
||||
#define _Q_TAIL_OFFSET _Q_TAIL_IDX_OFFSET
|
||||
#define _Q_TAIL_MASK (_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK)
|
||||
|
||||
#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET)
|
||||
#define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET)
|
||||
|
||||
#endif /* __ASM_GENERIC_QSPINLOCK_TYPES_H */
|
@ -250,7 +250,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
|
||||
({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
|
||||
|
||||
#define WRITE_ONCE(x, val) \
|
||||
({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; })
|
||||
({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
@ -450,7 +450,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
|
||||
* with an explicit memory barrier or atomic instruction that provides the
|
||||
* required ordering.
|
||||
*
|
||||
* If possible use READ_ONCE/ASSIGN_ONCE instead.
|
||||
* If possible use READ_ONCE()/WRITE_ONCE() instead.
|
||||
*/
|
||||
#define __ACCESS_ONCE(x) ({ \
|
||||
__maybe_unused typeof(x) __var = (__force typeof(x)) 0; \
|
||||
|
@ -32,4 +32,9 @@ static inline void osq_lock_init(struct optimistic_spin_queue *lock)
|
||||
extern bool osq_lock(struct optimistic_spin_queue *lock);
|
||||
extern void osq_unlock(struct optimistic_spin_queue *lock);
|
||||
|
||||
static inline bool osq_is_locked(struct optimistic_spin_queue *lock)
|
||||
{
|
||||
return atomic_read(&lock->tail) != OSQ_UNLOCKED_VAL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -252,7 +252,7 @@ extern char ___assert_task_state[1 - 2*!!(
|
||||
#define set_task_state(tsk, state_value) \
|
||||
do { \
|
||||
(tsk)->task_state_change = _THIS_IP_; \
|
||||
set_mb((tsk)->state, (state_value)); \
|
||||
smp_store_mb((tsk)->state, (state_value)); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
@ -274,7 +274,7 @@ extern char ___assert_task_state[1 - 2*!!(
|
||||
#define set_current_state(state_value) \
|
||||
do { \
|
||||
current->task_state_change = _THIS_IP_; \
|
||||
set_mb(current->state, (state_value)); \
|
||||
smp_store_mb(current->state, (state_value)); \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
@ -282,7 +282,7 @@ extern char ___assert_task_state[1 - 2*!!(
|
||||
#define __set_task_state(tsk, state_value) \
|
||||
do { (tsk)->state = (state_value); } while (0)
|
||||
#define set_task_state(tsk, state_value) \
|
||||
set_mb((tsk)->state, (state_value))
|
||||
smp_store_mb((tsk)->state, (state_value))
|
||||
|
||||
/*
|
||||
* set_current_state() includes a barrier so that the write of current->state
|
||||
@ -298,7 +298,7 @@ extern char ___assert_task_state[1 - 2*!!(
|
||||
#define __set_current_state(state_value) \
|
||||
do { current->state = (state_value); } while (0)
|
||||
#define set_current_state(state_value) \
|
||||
set_mb(current->state, (state_value))
|
||||
smp_store_mb(current->state, (state_value))
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -235,9 +235,16 @@ config LOCK_SPIN_ON_OWNER
|
||||
def_bool y
|
||||
depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER
|
||||
|
||||
config ARCH_USE_QUEUE_RWLOCK
|
||||
config ARCH_USE_QUEUED_SPINLOCKS
|
||||
bool
|
||||
|
||||
config QUEUE_RWLOCK
|
||||
def_bool y if ARCH_USE_QUEUE_RWLOCK
|
||||
config QUEUED_SPINLOCKS
|
||||
def_bool y if ARCH_USE_QUEUED_SPINLOCKS
|
||||
depends on SMP
|
||||
|
||||
config ARCH_USE_QUEUED_RWLOCKS
|
||||
bool
|
||||
|
||||
config QUEUED_RWLOCKS
|
||||
def_bool y if ARCH_USE_QUEUED_RWLOCKS
|
||||
depends on SMP
|
||||
|
@ -2055,7 +2055,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
|
||||
{
|
||||
/*
|
||||
* The task state is guaranteed to be set before another task can
|
||||
* wake it. set_current_state() is implemented using set_mb() and
|
||||
* wake it. set_current_state() is implemented using smp_store_mb() and
|
||||
* queue_me() calls spin_unlock() upon completion, both serializing
|
||||
* access to the hash list and forcing another memory barrier.
|
||||
*/
|
||||
|
@ -17,6 +17,7 @@ obj-$(CONFIG_SMP) += spinlock.o
|
||||
obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o
|
||||
obj-$(CONFIG_SMP) += lglock.o
|
||||
obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
|
||||
obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o
|
||||
obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
|
||||
obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
|
||||
obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
|
||||
@ -25,5 +26,5 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
|
||||
obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
|
||||
obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
|
||||
obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
|
||||
obj-$(CONFIG_QUEUE_RWLOCK) += qrwlock.o
|
||||
obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
|
||||
obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
|
||||
|
@ -4066,8 +4066,7 @@ void __init lockdep_info(void)
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCKDEP
|
||||
if (lockdep_init_error) {
|
||||
printk("WARNING: lockdep init error! lock-%s was acquired"
|
||||
"before lockdep_init\n", lock_init_error);
|
||||
printk("WARNING: lockdep init error: lock '%s' was acquired before lockdep_init().\n", lock_init_error);
|
||||
printk("Call stack leading to lockdep invocation was:\n");
|
||||
print_stack_trace(&lockdep_init_trace, 0);
|
||||
}
|
||||
|
@ -17,6 +17,7 @@
|
||||
struct mcs_spinlock {
|
||||
struct mcs_spinlock *next;
|
||||
int locked; /* 1 if lock acquired */
|
||||
int count; /* nesting count, see qspinlock.c */
|
||||
};
|
||||
|
||||
#ifndef arch_mcs_spin_lock_contended
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Queue read/write lock
|
||||
* Queued read/write locks
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
473
kernel/locking/qspinlock.c
Normal file
473
kernel/locking/qspinlock.c
Normal file
@ -0,0 +1,473 @@
|
||||
/*
|
||||
* Queued spinlock
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
|
||||
* (C) Copyright 2013-2014 Red Hat, Inc.
|
||||
* (C) Copyright 2015 Intel Corp.
|
||||
*
|
||||
* Authors: Waiman Long <waiman.long@hp.com>
|
||||
* Peter Zijlstra <peterz@infradead.org>
|
||||
*/
|
||||
|
||||
#ifndef _GEN_PV_LOCK_SLOWPATH
|
||||
|
||||
#include <linux/smp.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <asm/byteorder.h>
|
||||
#include <asm/qspinlock.h>
|
||||
|
||||
/*
|
||||
* The basic principle of a queue-based spinlock can best be understood
|
||||
* by studying a classic queue-based spinlock implementation called the
|
||||
* MCS lock. The paper below provides a good description for this kind
|
||||
* of lock.
|
||||
*
|
||||
* http://www.cise.ufl.edu/tr/DOC/REP-1992-71.pdf
|
||||
*
|
||||
* This queued spinlock implementation is based on the MCS lock, however to make
|
||||
* it fit the 4 bytes we assume spinlock_t to be, and preserve its existing
|
||||
* API, we must modify it somehow.
|
||||
*
|
||||
* In particular; where the traditional MCS lock consists of a tail pointer
|
||||
* (8 bytes) and needs the next pointer (another 8 bytes) of its own node to
|
||||
* unlock the next pending (next->locked), we compress both these: {tail,
|
||||
* next->locked} into a single u32 value.
|
||||
*
|
||||
* Since a spinlock disables recursion of its own context and there is a limit
|
||||
* to the contexts that can nest; namely: task, softirq, hardirq, nmi. As there
|
||||
* are at most 4 nesting levels, it can be encoded by a 2-bit number. Now
|
||||
* we can encode the tail by combining the 2-bit nesting level with the cpu
|
||||
* number. With one byte for the lock value and 3 bytes for the tail, only a
|
||||
* 32-bit word is now needed. Even though we only need 1 bit for the lock,
|
||||
* we extend it to a full byte to achieve better performance for architectures
|
||||
* that support atomic byte write.
|
||||
*
|
||||
* We also change the first spinner to spin on the lock bit instead of its
|
||||
* node; whereby avoiding the need to carry a node from lock to unlock, and
|
||||
* preserving existing lock API. This also makes the unlock code simpler and
|
||||
* faster.
|
||||
*
|
||||
* N.B. The current implementation only supports architectures that allow
|
||||
* atomic operations on smaller 8-bit and 16-bit data types.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "mcs_spinlock.h"
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_SPINLOCKS
|
||||
#define MAX_NODES 8
|
||||
#else
|
||||
#define MAX_NODES 4
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Per-CPU queue node structures; we can never have more than 4 nested
|
||||
* contexts: task, softirq, hardirq, nmi.
|
||||
*
|
||||
* Exactly fits one 64-byte cacheline on a 64-bit architecture.
|
||||
*
|
||||
* PV doubles the storage and uses the second cacheline for PV state.
|
||||
*/
|
||||
static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]);
|
||||
|
||||
/*
|
||||
* We must be able to distinguish between no-tail and the tail at 0:0,
|
||||
* therefore increment the cpu number by one.
|
||||
*/
|
||||
|
||||
static inline u32 encode_tail(int cpu, int idx)
|
||||
{
|
||||
u32 tail;
|
||||
|
||||
#ifdef CONFIG_DEBUG_SPINLOCK
|
||||
BUG_ON(idx > 3);
|
||||
#endif
|
||||
tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
|
||||
tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
|
||||
|
||||
return tail;
|
||||
}
|
||||
|
||||
static inline struct mcs_spinlock *decode_tail(u32 tail)
|
||||
{
|
||||
int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
|
||||
int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
|
||||
|
||||
return per_cpu_ptr(&mcs_nodes[idx], cpu);
|
||||
}
|
||||
|
||||
#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
|
||||
|
||||
/*
|
||||
* By using the whole 2nd least significant byte for the pending bit, we
|
||||
* can allow better optimization of the lock acquisition for the pending
|
||||
* bit holder.
|
||||
*
|
||||
* This internal structure is also used by the set_locked function which
|
||||
* is not restricted to _Q_PENDING_BITS == 8.
|
||||
*/
|
||||
struct __qspinlock {
|
||||
union {
|
||||
atomic_t val;
|
||||
#ifdef __LITTLE_ENDIAN
|
||||
struct {
|
||||
u8 locked;
|
||||
u8 pending;
|
||||
};
|
||||
struct {
|
||||
u16 locked_pending;
|
||||
u16 tail;
|
||||
};
|
||||
#else
|
||||
struct {
|
||||
u16 tail;
|
||||
u16 locked_pending;
|
||||
};
|
||||
struct {
|
||||
u8 reserved[2];
|
||||
u8 pending;
|
||||
u8 locked;
|
||||
};
|
||||
#endif
|
||||
};
|
||||
};
|
||||
|
||||
#if _Q_PENDING_BITS == 8
|
||||
/**
|
||||
* clear_pending_set_locked - take ownership and clear the pending bit.
|
||||
* @lock: Pointer to queued spinlock structure
|
||||
*
|
||||
* *,1,0 -> *,0,1
|
||||
*
|
||||
* Lock stealing is not allowed if this function is used.
|
||||
*/
|
||||
static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
|
||||
{
|
||||
struct __qspinlock *l = (void *)lock;
|
||||
|
||||
WRITE_ONCE(l->locked_pending, _Q_LOCKED_VAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* xchg_tail - Put in the new queue tail code word & retrieve previous one
|
||||
* @lock : Pointer to queued spinlock structure
|
||||
* @tail : The new queue tail code word
|
||||
* Return: The previous queue tail code word
|
||||
*
|
||||
* xchg(lock, tail)
|
||||
*
|
||||
* p,*,* -> n,*,* ; prev = xchg(lock, node)
|
||||
*/
|
||||
static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
|
||||
{
|
||||
struct __qspinlock *l = (void *)lock;
|
||||
|
||||
return (u32)xchg(&l->tail, tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
|
||||
}
|
||||
|
||||
#else /* _Q_PENDING_BITS == 8 */
|
||||
|
||||
/**
|
||||
* clear_pending_set_locked - take ownership and clear the pending bit.
|
||||
* @lock: Pointer to queued spinlock structure
|
||||
*
|
||||
* *,1,0 -> *,0,1
|
||||
*/
|
||||
static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
|
||||
{
|
||||
atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val);
|
||||
}
|
||||
|
||||
/**
|
||||
* xchg_tail - Put in the new queue tail code word & retrieve previous one
|
||||
* @lock : Pointer to queued spinlock structure
|
||||
* @tail : The new queue tail code word
|
||||
* Return: The previous queue tail code word
|
||||
*
|
||||
* xchg(lock, tail)
|
||||
*
|
||||
* p,*,* -> n,*,* ; prev = xchg(lock, node)
|
||||
*/
|
||||
static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
|
||||
{
|
||||
u32 old, new, val = atomic_read(&lock->val);
|
||||
|
||||
for (;;) {
|
||||
new = (val & _Q_LOCKED_PENDING_MASK) | tail;
|
||||
old = atomic_cmpxchg(&lock->val, val, new);
|
||||
if (old == val)
|
||||
break;
|
||||
|
||||
val = old;
|
||||
}
|
||||
return old;
|
||||
}
|
||||
#endif /* _Q_PENDING_BITS == 8 */
|
||||
|
||||
/**
|
||||
* set_locked - Set the lock bit and own the lock
|
||||
* @lock: Pointer to queued spinlock structure
|
||||
*
|
||||
* *,*,0 -> *,0,1
|
||||
*/
|
||||
static __always_inline void set_locked(struct qspinlock *lock)
|
||||
{
|
||||
struct __qspinlock *l = (void *)lock;
|
||||
|
||||
WRITE_ONCE(l->locked, _Q_LOCKED_VAL);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Generate the native code for queued_spin_unlock_slowpath(); provide NOPs for
|
||||
* all the PV callbacks.
|
||||
*/
|
||||
|
||||
static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
|
||||
static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { }
|
||||
static __always_inline void __pv_kick_node(struct mcs_spinlock *node) { }
|
||||
|
||||
static __always_inline void __pv_wait_head(struct qspinlock *lock,
|
||||
struct mcs_spinlock *node) { }
|
||||
|
||||
#define pv_enabled() false
|
||||
|
||||
#define pv_init_node __pv_init_node
|
||||
#define pv_wait_node __pv_wait_node
|
||||
#define pv_kick_node __pv_kick_node
|
||||
#define pv_wait_head __pv_wait_head
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_SPINLOCKS
|
||||
#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath
|
||||
#endif
|
||||
|
||||
#endif /* _GEN_PV_LOCK_SLOWPATH */
|
||||
|
||||
/**
|
||||
* queued_spin_lock_slowpath - acquire the queued spinlock
|
||||
* @lock: Pointer to queued spinlock structure
|
||||
* @val: Current value of the queued spinlock 32-bit word
|
||||
*
|
||||
* (queue tail, pending bit, lock value)
|
||||
*
|
||||
* fast : slow : unlock
|
||||
* : :
|
||||
* uncontended (0,0,0) -:--> (0,0,1) ------------------------------:--> (*,*,0)
|
||||
* : | ^--------.------. / :
|
||||
* : v \ \ | :
|
||||
* pending : (0,1,1) +--> (0,1,0) \ | :
|
||||
* : | ^--' | | :
|
||||
* : v | | :
|
||||
* uncontended : (n,x,y) +--> (n,0,0) --' | :
|
||||
* queue : | ^--' | :
|
||||
* : v | :
|
||||
* contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' :
|
||||
* queue : ^--' :
|
||||
*/
|
||||
void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
|
||||
{
|
||||
struct mcs_spinlock *prev, *next, *node;
|
||||
u32 new, old, tail;
|
||||
int idx;
|
||||
|
||||
BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
|
||||
|
||||
if (pv_enabled())
|
||||
goto queue;
|
||||
|
||||
if (virt_queued_spin_lock(lock))
|
||||
return;
|
||||
|
||||
/*
|
||||
* wait for in-progress pending->locked hand-overs
|
||||
*
|
||||
* 0,1,0 -> 0,0,1
|
||||
*/
|
||||
if (val == _Q_PENDING_VAL) {
|
||||
while ((val = atomic_read(&lock->val)) == _Q_PENDING_VAL)
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
/*
|
||||
* trylock || pending
|
||||
*
|
||||
* 0,0,0 -> 0,0,1 ; trylock
|
||||
* 0,0,1 -> 0,1,1 ; pending
|
||||
*/
|
||||
for (;;) {
|
||||
/*
|
||||
* If we observe any contention; queue.
|
||||
*/
|
||||
if (val & ~_Q_LOCKED_MASK)
|
||||
goto queue;
|
||||
|
||||
new = _Q_LOCKED_VAL;
|
||||
if (val == new)
|
||||
new |= _Q_PENDING_VAL;
|
||||
|
||||
old = atomic_cmpxchg(&lock->val, val, new);
|
||||
if (old == val)
|
||||
break;
|
||||
|
||||
val = old;
|
||||
}
|
||||
|
||||
/*
|
||||
* we won the trylock
|
||||
*/
|
||||
if (new == _Q_LOCKED_VAL)
|
||||
return;
|
||||
|
||||
/*
|
||||
* we're pending, wait for the owner to go away.
|
||||
*
|
||||
* *,1,1 -> *,1,0
|
||||
*
|
||||
* this wait loop must be a load-acquire such that we match the
|
||||
* store-release that clears the locked bit and create lock
|
||||
* sequentiality; this is because not all clear_pending_set_locked()
|
||||
* implementations imply full barriers.
|
||||
*/
|
||||
while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_MASK)
|
||||
cpu_relax();
|
||||
|
||||
/*
|
||||
* take ownership and clear the pending bit.
|
||||
*
|
||||
* *,1,0 -> *,0,1
|
||||
*/
|
||||
clear_pending_set_locked(lock);
|
||||
return;
|
||||
|
||||
/*
|
||||
* End of pending bit optimistic spinning and beginning of MCS
|
||||
* queuing.
|
||||
*/
|
||||
queue:
|
||||
node = this_cpu_ptr(&mcs_nodes[0]);
|
||||
idx = node->count++;
|
||||
tail = encode_tail(smp_processor_id(), idx);
|
||||
|
||||
node += idx;
|
||||
node->locked = 0;
|
||||
node->next = NULL;
|
||||
pv_init_node(node);
|
||||
|
||||
/*
|
||||
* We touched a (possibly) cold cacheline in the per-cpu queue node;
|
||||
* attempt the trylock once more in the hope someone let go while we
|
||||
* weren't watching.
|
||||
*/
|
||||
if (queued_spin_trylock(lock))
|
||||
goto release;
|
||||
|
||||
/*
|
||||
* We have already touched the queueing cacheline; don't bother with
|
||||
* pending stuff.
|
||||
*
|
||||
* p,*,* -> n,*,*
|
||||
*/
|
||||
old = xchg_tail(lock, tail);
|
||||
|
||||
/*
|
||||
* if there was a previous node; link it and wait until reaching the
|
||||
* head of the waitqueue.
|
||||
*/
|
||||
if (old & _Q_TAIL_MASK) {
|
||||
prev = decode_tail(old);
|
||||
WRITE_ONCE(prev->next, node);
|
||||
|
||||
pv_wait_node(node);
|
||||
arch_mcs_spin_lock_contended(&node->locked);
|
||||
}
|
||||
|
||||
/*
|
||||
* we're at the head of the waitqueue, wait for the owner & pending to
|
||||
* go away.
|
||||
*
|
||||
* *,x,y -> *,0,0
|
||||
*
|
||||
* this wait loop must use a load-acquire such that we match the
|
||||
* store-release that clears the locked bit and create lock
|
||||
* sequentiality; this is because the set_locked() function below
|
||||
* does not imply a full barrier.
|
||||
*
|
||||
*/
|
||||
pv_wait_head(lock, node);
|
||||
while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_PENDING_MASK)
|
||||
cpu_relax();
|
||||
|
||||
/*
|
||||
* claim the lock:
|
||||
*
|
||||
* n,0,0 -> 0,0,1 : lock, uncontended
|
||||
* *,0,0 -> *,0,1 : lock, contended
|
||||
*
|
||||
* If the queue head is the only one in the queue (lock value == tail),
|
||||
* clear the tail code and grab the lock. Otherwise, we only need
|
||||
* to grab the lock.
|
||||
*/
|
||||
for (;;) {
|
||||
if (val != tail) {
|
||||
set_locked(lock);
|
||||
break;
|
||||
}
|
||||
old = atomic_cmpxchg(&lock->val, val, _Q_LOCKED_VAL);
|
||||
if (old == val)
|
||||
goto release; /* No contention */
|
||||
|
||||
val = old;
|
||||
}
|
||||
|
||||
/*
|
||||
* contended path; wait for next, release.
|
||||
*/
|
||||
while (!(next = READ_ONCE(node->next)))
|
||||
cpu_relax();
|
||||
|
||||
arch_mcs_spin_unlock_contended(&next->locked);
|
||||
pv_kick_node(next);
|
||||
|
||||
release:
|
||||
/*
|
||||
* release the node
|
||||
*/
|
||||
this_cpu_dec(mcs_nodes[0].count);
|
||||
}
|
||||
EXPORT_SYMBOL(queued_spin_lock_slowpath);
|
||||
|
||||
/*
|
||||
* Generate the paravirt code for queued_spin_unlock_slowpath().
|
||||
*/
|
||||
#if !defined(_GEN_PV_LOCK_SLOWPATH) && defined(CONFIG_PARAVIRT_SPINLOCKS)
|
||||
#define _GEN_PV_LOCK_SLOWPATH
|
||||
|
||||
#undef pv_enabled
|
||||
#define pv_enabled() true
|
||||
|
||||
#undef pv_init_node
|
||||
#undef pv_wait_node
|
||||
#undef pv_kick_node
|
||||
#undef pv_wait_head
|
||||
|
||||
#undef queued_spin_lock_slowpath
|
||||
#define queued_spin_lock_slowpath __pv_queued_spin_lock_slowpath
|
||||
|
||||
#include "qspinlock_paravirt.h"
|
||||
#include "qspinlock.c"
|
||||
|
||||
#endif
|
325
kernel/locking/qspinlock_paravirt.h
Normal file
325
kernel/locking/qspinlock_paravirt.h
Normal file
@ -0,0 +1,325 @@
|
||||
#ifndef _GEN_PV_LOCK_SLOWPATH
|
||||
#error "do not include this file"
|
||||
#endif
|
||||
|
||||
#include <linux/hash.h>
|
||||
#include <linux/bootmem.h>
|
||||
|
||||
/*
|
||||
* Implement paravirt qspinlocks; the general idea is to halt the vcpus instead
|
||||
* of spinning them.
|
||||
*
|
||||
* This relies on the architecture to provide two paravirt hypercalls:
|
||||
*
|
||||
* pv_wait(u8 *ptr, u8 val) -- suspends the vcpu if *ptr == val
|
||||
* pv_kick(cpu) -- wakes a suspended vcpu
|
||||
*
|
||||
* Using these we implement __pv_queued_spin_lock_slowpath() and
|
||||
* __pv_queued_spin_unlock() to replace native_queued_spin_lock_slowpath() and
|
||||
* native_queued_spin_unlock().
|
||||
*/
|
||||
|
||||
#define _Q_SLOW_VAL (3U << _Q_LOCKED_OFFSET)
|
||||
|
||||
enum vcpu_state {
|
||||
vcpu_running = 0,
|
||||
vcpu_halted,
|
||||
};
|
||||
|
||||
struct pv_node {
|
||||
struct mcs_spinlock mcs;
|
||||
struct mcs_spinlock __res[3];
|
||||
|
||||
int cpu;
|
||||
u8 state;
|
||||
};
|
||||
|
||||
/*
|
||||
* Lock and MCS node addresses hash table for fast lookup
|
||||
*
|
||||
* Hashing is done on a per-cacheline basis to minimize the need to access
|
||||
* more than one cacheline.
|
||||
*
|
||||
* Dynamically allocate a hash table big enough to hold at least 4X the
|
||||
* number of possible cpus in the system. Allocation is done on page
|
||||
* granularity. So the minimum number of hash buckets should be at least
|
||||
* 256 (64-bit) or 512 (32-bit) to fully utilize a 4k page.
|
||||
*
|
||||
* Since we should not be holding locks from NMI context (very rare indeed) the
|
||||
* max load factor is 0.75, which is around the point where open addressing
|
||||
* breaks down.
|
||||
*
|
||||
*/
|
||||
struct pv_hash_entry {
|
||||
struct qspinlock *lock;
|
||||
struct pv_node *node;
|
||||
};
|
||||
|
||||
#define PV_HE_PER_LINE (SMP_CACHE_BYTES / sizeof(struct pv_hash_entry))
|
||||
#define PV_HE_MIN (PAGE_SIZE / sizeof(struct pv_hash_entry))
|
||||
|
||||
static struct pv_hash_entry *pv_lock_hash;
|
||||
static unsigned int pv_lock_hash_bits __read_mostly;
|
||||
|
||||
/*
|
||||
* Allocate memory for the PV qspinlock hash buckets
|
||||
*
|
||||
* This function should be called from the paravirt spinlock initialization
|
||||
* routine.
|
||||
*/
|
||||
void __init __pv_init_lock_hash(void)
|
||||
{
|
||||
int pv_hash_size = ALIGN(4 * num_possible_cpus(), PV_HE_PER_LINE);
|
||||
|
||||
if (pv_hash_size < PV_HE_MIN)
|
||||
pv_hash_size = PV_HE_MIN;
|
||||
|
||||
/*
|
||||
* Allocate space from bootmem which should be page-size aligned
|
||||
* and hence cacheline aligned.
|
||||
*/
|
||||
pv_lock_hash = alloc_large_system_hash("PV qspinlock",
|
||||
sizeof(struct pv_hash_entry),
|
||||
pv_hash_size, 0, HASH_EARLY,
|
||||
&pv_lock_hash_bits, NULL,
|
||||
pv_hash_size, pv_hash_size);
|
||||
}
|
||||
|
||||
#define for_each_hash_entry(he, offset, hash) \
|
||||
for (hash &= ~(PV_HE_PER_LINE - 1), he = &pv_lock_hash[hash], offset = 0; \
|
||||
offset < (1 << pv_lock_hash_bits); \
|
||||
offset++, he = &pv_lock_hash[(hash + offset) & ((1 << pv_lock_hash_bits) - 1)])
|
||||
|
||||
static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
|
||||
{
|
||||
unsigned long offset, hash = hash_ptr(lock, pv_lock_hash_bits);
|
||||
struct pv_hash_entry *he;
|
||||
|
||||
for_each_hash_entry(he, offset, hash) {
|
||||
if (!cmpxchg(&he->lock, NULL, lock)) {
|
||||
WRITE_ONCE(he->node, node);
|
||||
return &he->lock;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Hard assume there is a free entry for us.
|
||||
*
|
||||
* This is guaranteed by ensuring every blocked lock only ever consumes
|
||||
* a single entry, and since we only have 4 nesting levels per CPU
|
||||
* and allocated 4*nr_possible_cpus(), this must be so.
|
||||
*
|
||||
* The single entry is guaranteed by having the lock owner unhash
|
||||
* before it releases.
|
||||
*/
|
||||
BUG();
|
||||
}
|
||||
|
||||
static struct pv_node *pv_unhash(struct qspinlock *lock)
|
||||
{
|
||||
unsigned long offset, hash = hash_ptr(lock, pv_lock_hash_bits);
|
||||
struct pv_hash_entry *he;
|
||||
struct pv_node *node;
|
||||
|
||||
for_each_hash_entry(he, offset, hash) {
|
||||
if (READ_ONCE(he->lock) == lock) {
|
||||
node = READ_ONCE(he->node);
|
||||
WRITE_ONCE(he->lock, NULL);
|
||||
return node;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Hard assume we'll find an entry.
|
||||
*
|
||||
* This guarantees a limited lookup time and is itself guaranteed by
|
||||
* having the lock owner do the unhash -- IFF the unlock sees the
|
||||
* SLOW flag, there MUST be a hash entry.
|
||||
*/
|
||||
BUG();
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the PV part of the mcs_spinlock node.
|
||||
*/
|
||||
static void pv_init_node(struct mcs_spinlock *node)
|
||||
{
|
||||
struct pv_node *pn = (struct pv_node *)node;
|
||||
|
||||
BUILD_BUG_ON(sizeof(struct pv_node) > 5*sizeof(struct mcs_spinlock));
|
||||
|
||||
pn->cpu = smp_processor_id();
|
||||
pn->state = vcpu_running;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for node->locked to become true, halt the vcpu after a short spin.
|
||||
* pv_kick_node() is used to wake the vcpu again.
|
||||
*/
|
||||
static void pv_wait_node(struct mcs_spinlock *node)
|
||||
{
|
||||
struct pv_node *pn = (struct pv_node *)node;
|
||||
int loop;
|
||||
|
||||
for (;;) {
|
||||
for (loop = SPIN_THRESHOLD; loop; loop--) {
|
||||
if (READ_ONCE(node->locked))
|
||||
return;
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
/*
|
||||
* Order pn->state vs pn->locked thusly:
|
||||
*
|
||||
* [S] pn->state = vcpu_halted [S] next->locked = 1
|
||||
* MB MB
|
||||
* [L] pn->locked [RmW] pn->state = vcpu_running
|
||||
*
|
||||
* Matches the xchg() from pv_kick_node().
|
||||
*/
|
||||
smp_store_mb(pn->state, vcpu_halted);
|
||||
|
||||
if (!READ_ONCE(node->locked))
|
||||
pv_wait(&pn->state, vcpu_halted);
|
||||
|
||||
/*
|
||||
* Reset the vCPU state to avoid unncessary CPU kicking
|
||||
*/
|
||||
WRITE_ONCE(pn->state, vcpu_running);
|
||||
|
||||
/*
|
||||
* If the locked flag is still not set after wakeup, it is a
|
||||
* spurious wakeup and the vCPU should wait again. However,
|
||||
* there is a pretty high overhead for CPU halting and kicking.
|
||||
* So it is better to spin for a while in the hope that the
|
||||
* MCS lock will be released soon.
|
||||
*/
|
||||
}
|
||||
/*
|
||||
* By now our node->locked should be 1 and our caller will not actually
|
||||
* spin-wait for it. We do however rely on our caller to do a
|
||||
* load-acquire for us.
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* Called after setting next->locked = 1, used to wake those stuck in
|
||||
* pv_wait_node().
|
||||
*/
|
||||
static void pv_kick_node(struct mcs_spinlock *node)
|
||||
{
|
||||
struct pv_node *pn = (struct pv_node *)node;
|
||||
|
||||
/*
|
||||
* Note that because node->locked is already set, this actual
|
||||
* mcs_spinlock entry could be re-used already.
|
||||
*
|
||||
* This should be fine however, kicking people for no reason is
|
||||
* harmless.
|
||||
*
|
||||
* See the comment in pv_wait_node().
|
||||
*/
|
||||
if (xchg(&pn->state, vcpu_running) == vcpu_halted)
|
||||
pv_kick(pn->cpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for l->locked to become clear; halt the vcpu after a short spin.
|
||||
* __pv_queued_spin_unlock() will wake us.
|
||||
*/
|
||||
static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
|
||||
{
|
||||
struct pv_node *pn = (struct pv_node *)node;
|
||||
struct __qspinlock *l = (void *)lock;
|
||||
struct qspinlock **lp = NULL;
|
||||
int loop;
|
||||
|
||||
for (;;) {
|
||||
for (loop = SPIN_THRESHOLD; loop; loop--) {
|
||||
if (!READ_ONCE(l->locked))
|
||||
return;
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
WRITE_ONCE(pn->state, vcpu_halted);
|
||||
if (!lp) { /* ONCE */
|
||||
lp = pv_hash(lock, pn);
|
||||
/*
|
||||
* lp must be set before setting _Q_SLOW_VAL
|
||||
*
|
||||
* [S] lp = lock [RmW] l = l->locked = 0
|
||||
* MB MB
|
||||
* [S] l->locked = _Q_SLOW_VAL [L] lp
|
||||
*
|
||||
* Matches the cmpxchg() in __pv_queued_spin_unlock().
|
||||
*/
|
||||
if (!cmpxchg(&l->locked, _Q_LOCKED_VAL, _Q_SLOW_VAL)) {
|
||||
/*
|
||||
* The lock is free and _Q_SLOW_VAL has never
|
||||
* been set. Therefore we need to unhash before
|
||||
* getting the lock.
|
||||
*/
|
||||
WRITE_ONCE(*lp, NULL);
|
||||
return;
|
||||
}
|
||||
}
|
||||
pv_wait(&l->locked, _Q_SLOW_VAL);
|
||||
|
||||
/*
|
||||
* The unlocker should have freed the lock before kicking the
|
||||
* CPU. So if the lock is still not free, it is a spurious
|
||||
* wakeup and so the vCPU should wait again after spinning for
|
||||
* a while.
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* Lock is unlocked now; the caller will acquire it without waiting.
|
||||
* As with pv_wait_node() we rely on the caller to do a load-acquire
|
||||
* for us.
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* PV version of the unlock function to be used in stead of
|
||||
* queued_spin_unlock().
|
||||
*/
|
||||
__visible void __pv_queued_spin_unlock(struct qspinlock *lock)
|
||||
{
|
||||
struct __qspinlock *l = (void *)lock;
|
||||
struct pv_node *node;
|
||||
|
||||
/*
|
||||
* We must not unlock if SLOW, because in that case we must first
|
||||
* unhash. Otherwise it would be possible to have multiple @lock
|
||||
* entries, which would be BAD.
|
||||
*/
|
||||
if (likely(cmpxchg(&l->locked, _Q_LOCKED_VAL, 0) == _Q_LOCKED_VAL))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Since the above failed to release, this must be the SLOW path.
|
||||
* Therefore start by looking up the blocked node and unhashing it.
|
||||
*/
|
||||
node = pv_unhash(lock);
|
||||
|
||||
/*
|
||||
* Now that we have a reference to the (likely) blocked pv_node,
|
||||
* release the lock.
|
||||
*/
|
||||
smp_store_release(&l->locked, 0);
|
||||
|
||||
/*
|
||||
* At this point the memory pointed at by lock can be freed/reused,
|
||||
* however we can still use the pv_node to kick the CPU.
|
||||
*/
|
||||
if (READ_ONCE(node->state) == vcpu_halted)
|
||||
pv_kick(node->cpu);
|
||||
}
|
||||
/*
|
||||
* Include the architecture specific callee-save thunk of the
|
||||
* __pv_queued_spin_unlock(). This thunk is put together with
|
||||
* __pv_queued_spin_unlock() near the top of the file to make sure
|
||||
* that the callee-save thunk and the real unlock function are close
|
||||
* to each other sharing consecutive instruction cachelines.
|
||||
*/
|
||||
#include <asm/qspinlock_paravirt.h>
|
||||
|
@ -70,10 +70,10 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
|
||||
}
|
||||
|
||||
/*
|
||||
* We can speed up the acquire/release, if the architecture
|
||||
* supports cmpxchg and if there's no debugging state to be set up
|
||||
* We can speed up the acquire/release, if there's no debugging state to be
|
||||
* set up.
|
||||
*/
|
||||
#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES)
|
||||
#ifndef CONFIG_DEBUG_RT_MUTEXES
|
||||
# define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c)
|
||||
static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
|
||||
{
|
||||
@ -1443,10 +1443,17 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
|
||||
*
|
||||
* @lock: the rt_mutex to be locked
|
||||
*
|
||||
* This function can only be called in thread context. It's safe to
|
||||
* call it from atomic regions, but not from hard interrupt or soft
|
||||
* interrupt context.
|
||||
*
|
||||
* Returns 1 on success and 0 on contention
|
||||
*/
|
||||
int __sched rt_mutex_trylock(struct rt_mutex *lock)
|
||||
{
|
||||
if (WARN_ON(in_irq() || in_nmi() || in_serving_softirq()))
|
||||
return 0;
|
||||
|
||||
return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rt_mutex_trylock);
|
||||
|
@ -409,11 +409,24 @@ done:
|
||||
return taken;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true if the rwsem has active spinner
|
||||
*/
|
||||
static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
|
||||
{
|
||||
return osq_is_locked(&sem->osq);
|
||||
}
|
||||
|
||||
#else
|
||||
static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -496,7 +509,38 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* If a spinner is present, it is not necessary to do the wakeup.
|
||||
* Try to do wakeup only if the trylock succeeds to minimize
|
||||
* spinlock contention which may introduce too much delay in the
|
||||
* unlock operation.
|
||||
*
|
||||
* spinning writer up_write/up_read caller
|
||||
* --------------- -----------------------
|
||||
* [S] osq_unlock() [L] osq
|
||||
* MB RMB
|
||||
* [RmW] rwsem_try_write_lock() [RmW] spin_trylock(wait_lock)
|
||||
*
|
||||
* Here, it is important to make sure that there won't be a missed
|
||||
* wakeup while the rwsem is free and the only spinning writer goes
|
||||
* to sleep without taking the rwsem. Even when the spinning writer
|
||||
* is just going to break out of the waiting loop, it will still do
|
||||
* a trylock in rwsem_down_write_failed() before sleeping. IOW, if
|
||||
* rwsem_has_spinner() is true, it will guarantee at least one
|
||||
* trylock attempt on the rwsem later on.
|
||||
*/
|
||||
if (rwsem_has_spinner(sem)) {
|
||||
/*
|
||||
* The smp_rmb() here is to make sure that the spinner
|
||||
* state is consulted before reading the wait_lock.
|
||||
*/
|
||||
smp_rmb();
|
||||
if (!raw_spin_trylock_irqsave(&sem->wait_lock, flags))
|
||||
return sem;
|
||||
goto locked;
|
||||
}
|
||||
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
||||
locked:
|
||||
|
||||
/* do nothing if list empty */
|
||||
if (!list_empty(&sem->wait_list))
|
||||
|
@ -341,7 +341,7 @@ long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
|
||||
* condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss
|
||||
* an event.
|
||||
*/
|
||||
set_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */
|
||||
smp_store_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */
|
||||
|
||||
return timeout;
|
||||
}
|
||||
@ -354,7 +354,7 @@ int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
|
||||
* doesn't imply write barrier and the users expects write
|
||||
* barrier semantics on wakeup functions. The following
|
||||
* smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
|
||||
* and is paired with set_mb() in wait_woken().
|
||||
* and is paired with smp_store_mb() in wait_woken().
|
||||
*/
|
||||
smp_wmb(); /* C */
|
||||
wait->flags |= WQ_FLAG_WOKEN;
|
||||
|
Loading…
Reference in New Issue
Block a user