mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-18 09:56:18 +07:00
Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull core locking updates from Ingo Molnar: "The main changes are: - mutex, completions and rtmutex micro-optimizations - lock debugging fix - various cleanups in the MCS and the futex code" * 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: locking/rtmutex: Optimize setting task running after being blocked locking/rwsem: Use task->state helpers sched/completion: Add lock-free checking of the blocking case sched/completion: Remove unnecessary ->wait.lock serialization when reading completion state locking/mutex: Explicitly mark task as running after wakeup futex: Fix argument handling in futex_lock_pi() calls doc: Fix misnamed FUTEX_CMP_REQUEUE_PI op constants locking/Documentation: Update code path softirq/preempt: Add missing current->preempt_disable_ip update locking/osq: No need for load/acquire when acquire-polling locking/mcs: Better differentiate between MCS variants locking/mutex: Introduce ww_mutex_set_context_slowpath() locking/mutex: Move MCS related comments to proper location locking/mutex: Checking the stamp is WW only
This commit is contained in:
commit
8308756f45
@ -98,7 +98,7 @@ rt_mutex_start_proxy_lock() and rt_mutex_finish_proxy_lock(), which
|
||||
allow the requeue code to acquire an uncontended rt_mutex on behalf
|
||||
of the waiter and to enqueue the waiter on a contended rt_mutex.
|
||||
Two new system calls provide the kernel<->user interface to
|
||||
requeue_pi: FUTEX_WAIT_REQUEUE_PI and FUTEX_REQUEUE_CMP_PI.
|
||||
requeue_pi: FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI.
|
||||
|
||||
FUTEX_WAIT_REQUEUE_PI is called by the waiter (pthread_cond_wait()
|
||||
and pthread_cond_timedwait()) to block on the initial futex and wait
|
||||
@ -107,7 +107,7 @@ result of a high-speed collision between futex_wait() and
|
||||
futex_lock_pi(), with some extra logic to check for the additional
|
||||
wake-up scenarios.
|
||||
|
||||
FUTEX_REQUEUE_CMP_PI is called by the waker
|
||||
FUTEX_CMP_REQUEUE_PI is called by the waker
|
||||
(pthread_cond_broadcast() and pthread_cond_signal()) to requeue and
|
||||
possibly wake the waiting tasks. Internally, this system call is
|
||||
still handled by futex_requeue (by passing requeue_pi=1). Before
|
||||
@ -120,12 +120,12 @@ task as a waiter on the underlying rt_mutex. It is possible that
|
||||
the lock can be acquired at this stage as well, if so, the next
|
||||
waiter is woken to finish the acquisition of the lock.
|
||||
|
||||
FUTEX_REQUEUE_PI accepts nr_wake and nr_requeue as arguments, but
|
||||
FUTEX_CMP_REQUEUE_PI accepts nr_wake and nr_requeue as arguments, but
|
||||
their sum is all that really matters. futex_requeue() will wake or
|
||||
requeue up to nr_wake + nr_requeue tasks. It will wake only as many
|
||||
tasks as it can acquire the lock for, which in the majority of cases
|
||||
should be 0 as good programming practice dictates that the caller of
|
||||
either pthread_cond_broadcast() or pthread_cond_signal() acquire the
|
||||
mutex prior to making the call. FUTEX_REQUEUE_PI requires that
|
||||
mutex prior to making the call. FUTEX_CMP_REQUEUE_PI requires that
|
||||
nr_wake=1. nr_requeue should be INT_MAX for broadcast and 0 for
|
||||
signal.
|
||||
|
@ -34,7 +34,7 @@ The validator tracks lock-class usage history into 4n + 1 separate state bits:
|
||||
- 'ever held with STATE enabled'
|
||||
- 'ever held as readlock with STATE enabled'
|
||||
|
||||
Where STATE can be either one of (kernel/lockdep_states.h)
|
||||
Where STATE can be either one of (kernel/locking/lockdep_states.h)
|
||||
- hardirq
|
||||
- softirq
|
||||
- reclaim_fs
|
||||
|
@ -5,8 +5,11 @@
|
||||
* An MCS like lock especially tailored for optimistic spinning for sleeping
|
||||
* lock implementations (mutex, rwsem, etc).
|
||||
*/
|
||||
|
||||
#define OSQ_UNLOCKED_VAL (0)
|
||||
struct optimistic_spin_node {
|
||||
struct optimistic_spin_node *next, *prev;
|
||||
int locked; /* 1 if lock acquired */
|
||||
int cpu; /* encoded CPU # + 1 value */
|
||||
};
|
||||
|
||||
struct optimistic_spin_queue {
|
||||
/*
|
||||
@ -16,6 +19,8 @@ struct optimistic_spin_queue {
|
||||
atomic_t tail;
|
||||
};
|
||||
|
||||
#define OSQ_UNLOCKED_VAL (0)
|
||||
|
||||
/* Init macro and function. */
|
||||
#define OSQ_LOCK_UNLOCKED { ATOMIC_INIT(OSQ_UNLOCKED_VAL) }
|
||||
|
||||
@ -24,4 +29,7 @@ static inline void osq_lock_init(struct optimistic_spin_queue *lock)
|
||||
atomic_set(&lock->tail, OSQ_UNLOCKED_VAL);
|
||||
}
|
||||
|
||||
extern bool osq_lock(struct optimistic_spin_queue *lock);
|
||||
extern void osq_unlock(struct optimistic_spin_queue *lock);
|
||||
|
||||
#endif
|
||||
|
@ -231,6 +231,10 @@ config RWSEM_SPIN_ON_OWNER
|
||||
def_bool y
|
||||
depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
|
||||
|
||||
config LOCK_SPIN_ON_OWNER
|
||||
def_bool y
|
||||
depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER
|
||||
|
||||
config ARCH_USE_QUEUE_RWLOCK
|
||||
bool
|
||||
|
||||
|
@ -2258,7 +2258,7 @@ static long futex_wait_restart(struct restart_block *restart)
|
||||
* if there are waiters then it will block, it does PI, etc. (Due to
|
||||
* races the kernel might see a 0 value of the futex too.)
|
||||
*/
|
||||
static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
|
||||
static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
|
||||
ktime_t *time, int trylock)
|
||||
{
|
||||
struct hrtimer_sleeper timeout, *to = NULL;
|
||||
@ -2953,11 +2953,11 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
|
||||
case FUTEX_WAKE_OP:
|
||||
return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
|
||||
case FUTEX_LOCK_PI:
|
||||
return futex_lock_pi(uaddr, flags, val, timeout, 0);
|
||||
return futex_lock_pi(uaddr, flags, timeout, 0);
|
||||
case FUTEX_UNLOCK_PI:
|
||||
return futex_unlock_pi(uaddr, flags);
|
||||
case FUTEX_TRYLOCK_PI:
|
||||
return futex_lock_pi(uaddr, flags, 0, timeout, 1);
|
||||
return futex_lock_pi(uaddr, flags, NULL, 1);
|
||||
case FUTEX_WAIT_REQUEUE_PI:
|
||||
val3 = FUTEX_BITSET_MATCH_ANY;
|
||||
return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
|
||||
|
@ -1,5 +1,5 @@
|
||||
|
||||
obj-y += mutex.o semaphore.o rwsem.o mcs_spinlock.o
|
||||
obj-y += mutex.o semaphore.o rwsem.o
|
||||
|
||||
ifdef CONFIG_FUNCTION_TRACER
|
||||
CFLAGS_REMOVE_lockdep.o = -pg
|
||||
@ -14,6 +14,7 @@ ifeq ($(CONFIG_PROC_FS),y)
|
||||
obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
|
||||
endif
|
||||
obj-$(CONFIG_SMP) += spinlock.o
|
||||
obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o
|
||||
obj-$(CONFIG_SMP) += lglock.o
|
||||
obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
|
||||
obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
|
||||
|
@ -108,20 +108,4 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
|
||||
arch_mcs_spin_unlock_contended(&next->locked);
|
||||
}
|
||||
|
||||
/*
|
||||
* Cancellable version of the MCS lock above.
|
||||
*
|
||||
* Intended for adaptive spinning of sleeping locks:
|
||||
* mutex_lock()/rwsem_down_{read,write}() etc.
|
||||
*/
|
||||
|
||||
struct optimistic_spin_node {
|
||||
struct optimistic_spin_node *next, *prev;
|
||||
int locked; /* 1 if lock acquired */
|
||||
int cpu; /* encoded CPU # value */
|
||||
};
|
||||
|
||||
extern bool osq_lock(struct optimistic_spin_queue *lock);
|
||||
extern void osq_unlock(struct optimistic_spin_queue *lock);
|
||||
|
||||
#endif /* __LINUX_MCS_SPINLOCK_H */
|
||||
|
@ -147,7 +147,7 @@ static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
|
||||
}
|
||||
|
||||
/*
|
||||
* after acquiring lock with fastpath or when we lost out in contested
|
||||
* After acquiring lock with fastpath or when we lost out in contested
|
||||
* slowpath, set ctx and wake up any waiters so they can recheck.
|
||||
*
|
||||
* This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set,
|
||||
@ -191,19 +191,32 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock,
|
||||
spin_unlock_mutex(&lock->base.wait_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* After acquiring lock in the slowpath set ctx and wake up any
|
||||
* waiters so they can recheck.
|
||||
*
|
||||
* Callers must hold the mutex wait_lock.
|
||||
*/
|
||||
static __always_inline void
|
||||
ww_mutex_set_context_slowpath(struct ww_mutex *lock,
|
||||
struct ww_acquire_ctx *ctx)
|
||||
{
|
||||
struct mutex_waiter *cur;
|
||||
|
||||
ww_mutex_lock_acquired(lock, ctx);
|
||||
lock->ctx = ctx;
|
||||
|
||||
/*
|
||||
* Give any possible sleeping processes the chance to wake up,
|
||||
* so they can recheck if they have to back off.
|
||||
*/
|
||||
list_for_each_entry(cur, &lock->base.wait_list, list) {
|
||||
debug_mutex_wake_waiter(&lock->base, cur);
|
||||
wake_up_process(cur->task);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
|
||||
/*
|
||||
* In order to avoid a stampede of mutex spinners from acquiring the mutex
|
||||
* more or less simultaneously, the spinners need to acquire a MCS lock
|
||||
* first before spinning on the owner field.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* Mutex spinning code migrated from kernel/sched/core.c
|
||||
*/
|
||||
|
||||
static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
|
||||
{
|
||||
if (lock->owner != owner)
|
||||
@ -307,6 +320,11 @@ static bool mutex_optimistic_spin(struct mutex *lock,
|
||||
if (!mutex_can_spin_on_owner(lock))
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* In order to avoid a stampede of mutex spinners trying to
|
||||
* acquire the mutex all at once, the spinners need to take a
|
||||
* MCS (queued) lock first before spinning on the owner field.
|
||||
*/
|
||||
if (!osq_lock(&lock->osq))
|
||||
goto done;
|
||||
|
||||
@ -469,7 +487,7 @@ void __sched ww_mutex_unlock(struct ww_mutex *lock)
|
||||
EXPORT_SYMBOL(ww_mutex_unlock);
|
||||
|
||||
static inline int __sched
|
||||
__mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
|
||||
__ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
|
||||
{
|
||||
struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
|
||||
struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
|
||||
@ -557,7 +575,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
|
||||
}
|
||||
|
||||
if (use_ww_ctx && ww_ctx->acquired > 0) {
|
||||
ret = __mutex_lock_check_stamp(lock, ww_ctx);
|
||||
ret = __ww_mutex_lock_check_stamp(lock, ww_ctx);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
@ -569,6 +587,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
|
||||
schedule_preempt_disabled();
|
||||
spin_lock_mutex(&lock->wait_lock, flags);
|
||||
}
|
||||
__set_task_state(task, TASK_RUNNING);
|
||||
|
||||
mutex_remove_waiter(lock, &waiter, current_thread_info());
|
||||
/* set it to 0 if there are no waiters left: */
|
||||
if (likely(list_empty(&lock->wait_list)))
|
||||
@ -582,23 +602,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
|
||||
|
||||
if (use_ww_ctx) {
|
||||
struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
|
||||
struct mutex_waiter *cur;
|
||||
|
||||
/*
|
||||
* This branch gets optimized out for the common case,
|
||||
* and is only important for ww_mutex_lock.
|
||||
*/
|
||||
ww_mutex_lock_acquired(ww, ww_ctx);
|
||||
ww->ctx = ww_ctx;
|
||||
|
||||
/*
|
||||
* Give any possible sleeping processes the chance to wake up,
|
||||
* so they can recheck if they have to back off.
|
||||
*/
|
||||
list_for_each_entry(cur, &lock->wait_list, list) {
|
||||
debug_mutex_wake_waiter(lock, cur);
|
||||
wake_up_process(cur->task);
|
||||
}
|
||||
ww_mutex_set_context_slowpath(ww, ww_ctx);
|
||||
}
|
||||
|
||||
spin_unlock_mutex(&lock->wait_lock, flags);
|
||||
|
@ -1,8 +1,6 @@
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/sched.h>
|
||||
#include "mcs_spinlock.h"
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
#include <linux/osq_lock.h>
|
||||
|
||||
/*
|
||||
* An MCS like lock especially tailored for optimistic spinning for sleeping
|
||||
@ -111,7 +109,7 @@ bool osq_lock(struct optimistic_spin_queue *lock)
|
||||
* cmpxchg in an attempt to undo our queueing.
|
||||
*/
|
||||
|
||||
while (!smp_load_acquire(&node->locked)) {
|
||||
while (!ACCESS_ONCE(node->locked)) {
|
||||
/*
|
||||
* If we need to reschedule bail... so we can block.
|
||||
*/
|
||||
@ -203,6 +201,3 @@ void osq_unlock(struct optimistic_spin_queue *lock)
|
||||
if (next)
|
||||
ACCESS_ONCE(next->locked) = 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1130,6 +1130,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
|
||||
set_current_state(state);
|
||||
}
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1188,10 +1189,9 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
|
||||
ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk);
|
||||
|
||||
if (likely(!ret))
|
||||
/* sleep on the mutex */
|
||||
ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
|
||||
|
||||
set_current_state(TASK_RUNNING);
|
||||
|
||||
if (unlikely(ret)) {
|
||||
remove_waiter(lock, &waiter);
|
||||
rt_mutex_handle_deadlock(ret, chwalk, &waiter);
|
||||
@ -1626,10 +1626,9 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
/* sleep on the mutex */
|
||||
ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
|
||||
|
||||
set_current_state(TASK_RUNNING);
|
||||
|
||||
if (unlikely(ret))
|
||||
remove_waiter(lock, waiter);
|
||||
|
||||
|
@ -154,7 +154,7 @@ void __sched __down_read(struct rw_semaphore *sem)
|
||||
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
tsk->state = TASK_RUNNING;
|
||||
__set_task_state(tsk, TASK_RUNNING);
|
||||
out:
|
||||
;
|
||||
}
|
||||
|
@ -242,8 +242,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
|
||||
schedule();
|
||||
}
|
||||
|
||||
tsk->state = TASK_RUNNING;
|
||||
|
||||
__set_task_state(tsk, TASK_RUNNING);
|
||||
return sem;
|
||||
}
|
||||
EXPORT_SYMBOL(rwsem_down_read_failed);
|
||||
|
@ -268,6 +268,15 @@ bool try_wait_for_completion(struct completion *x)
|
||||
unsigned long flags;
|
||||
int ret = 1;
|
||||
|
||||
/*
|
||||
* Since x->done will need to be locked only
|
||||
* in the non-blocking case, we check x->done
|
||||
* first without taking the lock so we can
|
||||
* return early in the blocking case.
|
||||
*/
|
||||
if (!ACCESS_ONCE(x->done))
|
||||
return 0;
|
||||
|
||||
spin_lock_irqsave(&x->wait.lock, flags);
|
||||
if (!x->done)
|
||||
ret = 0;
|
||||
@ -288,13 +297,6 @@ EXPORT_SYMBOL(try_wait_for_completion);
|
||||
*/
|
||||
bool completion_done(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret = 1;
|
||||
|
||||
spin_lock_irqsave(&x->wait.lock, flags);
|
||||
if (!x->done)
|
||||
ret = 0;
|
||||
spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
return ret;
|
||||
return !!ACCESS_ONCE(x->done);
|
||||
}
|
||||
EXPORT_SYMBOL(completion_done);
|
||||
|
@ -114,8 +114,12 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
|
||||
trace_softirqs_off(ip);
|
||||
raw_local_irq_restore(flags);
|
||||
|
||||
if (preempt_count() == cnt)
|
||||
if (preempt_count() == cnt) {
|
||||
#ifdef CONFIG_DEBUG_PREEMPT
|
||||
current->preempt_disable_ip = get_parent_ip(CALLER_ADDR1);
|
||||
#endif
|
||||
trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(__local_bh_disable_ip);
|
||||
#endif /* CONFIG_TRACE_IRQFLAGS */
|
||||
|
Loading…
Reference in New Issue
Block a user