2005-04-17 05:20:36 +07:00
|
|
|
/* rwsem-spinlock.c: R/W semaphores: contention handling functions for
|
|
|
|
* generic spinlock implementation
|
|
|
|
*
|
|
|
|
* Copyright (c) 2001 David Howells (dhowells@redhat.com).
|
|
|
|
* - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
|
|
|
|
* - Derived also from comments by Linus
|
|
|
|
*/
|
|
|
|
#include <linux/rwsem.h>
|
2017-02-03 01:15:33 +07:00
|
|
|
#include <linux/sched/signal.h>
|
2017-02-09 00:51:35 +07:00
|
|
|
#include <linux/sched/debug.h>
|
2011-11-17 09:29:17 +07:00
|
|
|
#include <linux/export.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2013-05-07 20:45:49 +07:00
|
|
|
enum rwsem_waiter_type {
|
|
|
|
RWSEM_WAITING_FOR_WRITE,
|
|
|
|
RWSEM_WAITING_FOR_READ
|
|
|
|
};
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
struct rwsem_waiter {
|
|
|
|
struct list_head list;
|
|
|
|
struct task_struct *task;
|
2013-05-07 20:45:49 +07:00
|
|
|
enum rwsem_waiter_type type;
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
2009-12-15 09:00:21 +07:00
|
|
|
int rwsem_is_locked(struct rw_semaphore *sem)
|
|
|
|
{
|
|
|
|
int ret = 1;
|
|
|
|
unsigned long flags;
|
|
|
|
|
2010-02-24 15:54:54 +07:00
|
|
|
if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) {
|
2014-07-16 19:54:55 +07:00
|
|
|
ret = (sem->count != 0);
|
2010-02-24 15:54:54 +07:00
|
|
|
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
2009-12-15 09:00:21 +07:00
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(rwsem_is_locked);
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
* initialise the semaphore
|
|
|
|
*/
|
2006-07-03 14:24:53 +07:00
|
|
|
void __init_rwsem(struct rw_semaphore *sem, const char *name,
|
|
|
|
struct lock_class_key *key)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2006-07-03 14:24:53 +07:00
|
|
|
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
|
|
|
/*
|
|
|
|
* Make sure we are not reinitializing a held semaphore:
|
|
|
|
*/
|
|
|
|
debug_check_no_locks_freed((void *)sem, sizeof(*sem));
|
2006-10-11 12:45:14 +07:00
|
|
|
lockdep_init_map(&sem->dep_map, name, key, 0);
|
2006-07-03 14:24:53 +07:00
|
|
|
#endif
|
2014-07-16 19:54:55 +07:00
|
|
|
sem->count = 0;
|
2010-02-24 15:54:54 +07:00
|
|
|
raw_spin_lock_init(&sem->wait_lock);
|
2005-04-17 05:20:36 +07:00
|
|
|
INIT_LIST_HEAD(&sem->wait_list);
|
|
|
|
}
|
2009-12-15 09:00:20 +07:00
|
|
|
EXPORT_SYMBOL(__init_rwsem);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* handle the lock release when processes blocked on it that can now run
|
|
|
|
* - if we come here, then:
|
|
|
|
* - the 'active count' _reached_ zero
|
|
|
|
* - the 'waiting count' is non-zero
|
|
|
|
* - the spinlock must be held by the caller
|
|
|
|
* - woken process blocks are discarded from the list after having task zeroed
|
|
|
|
* - writers are only woken if wakewrite is non-zero
|
|
|
|
*/
|
|
|
|
static inline struct rw_semaphore *
|
|
|
|
__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
|
|
|
|
{
|
|
|
|
struct rwsem_waiter *waiter;
|
|
|
|
struct task_struct *tsk;
|
|
|
|
int woken;
|
|
|
|
|
|
|
|
waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
|
|
|
|
|
2013-05-07 20:45:49 +07:00
|
|
|
if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
|
2013-05-07 20:45:58 +07:00
|
|
|
if (wakewrite)
|
|
|
|
/* Wake up a writer. Note that we do not grant it the
|
|
|
|
* lock - it will have to acquire it when it runs. */
|
|
|
|
wake_up_process(waiter->task);
|
2005-04-17 05:20:36 +07:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* grant an infinite number of read locks to the front of the queue */
|
|
|
|
woken = 0;
|
2013-05-07 20:45:58 +07:00
|
|
|
do {
|
2005-04-17 05:20:36 +07:00
|
|
|
struct list_head *next = waiter->list.next;
|
|
|
|
|
|
|
|
list_del(&waiter->list);
|
|
|
|
tsk = waiter->task;
|
2015-01-30 16:14:24 +07:00
|
|
|
/*
|
|
|
|
* Make sure we do not wakeup the next reader before
|
|
|
|
* setting the nil condition to grant the next reader;
|
|
|
|
* otherwise we could miss the wakeup on the other
|
|
|
|
* side and end up sleeping again. See the pairing
|
|
|
|
* in rwsem_down_read_failed().
|
|
|
|
*/
|
2005-05-01 22:58:47 +07:00
|
|
|
smp_mb();
|
2005-04-17 05:20:36 +07:00
|
|
|
waiter->task = NULL;
|
|
|
|
wake_up_process(tsk);
|
|
|
|
put_task_struct(tsk);
|
|
|
|
woken++;
|
2013-05-07 20:45:58 +07:00
|
|
|
if (next == &sem->wait_list)
|
2005-04-17 05:20:36 +07:00
|
|
|
break;
|
|
|
|
waiter = list_entry(next, struct rwsem_waiter, list);
|
2013-05-07 20:45:58 +07:00
|
|
|
} while (waiter->type != RWSEM_WAITING_FOR_WRITE);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2014-07-16 19:54:55 +07:00
|
|
|
sem->count += woken;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
out:
|
|
|
|
return sem;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* wake a single writer
|
|
|
|
*/
|
|
|
|
static inline struct rw_semaphore *
|
|
|
|
__rwsem_wake_one_writer(struct rw_semaphore *sem)
|
|
|
|
{
|
|
|
|
struct rwsem_waiter *waiter;
|
|
|
|
|
|
|
|
waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
|
2013-02-01 17:59:16 +07:00
|
|
|
wake_up_process(waiter->task);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
return sem;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* get a read lock on the semaphore
|
|
|
|
*/
|
2008-02-08 19:19:55 +07:00
|
|
|
void __sched __down_read(struct rw_semaphore *sem)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
|
|
|
struct rwsem_waiter waiter;
|
2010-04-08 01:52:46 +07:00
|
|
|
unsigned long flags;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2010-02-24 15:54:54 +07:00
|
|
|
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2014-07-16 19:54:55 +07:00
|
|
|
if (sem->count >= 0 && list_empty(&sem->wait_list)) {
|
2005-04-17 05:20:36 +07:00
|
|
|
/* granted */
|
2014-07-16 19:54:55 +07:00
|
|
|
sem->count++;
|
2010-02-24 15:54:54 +07:00
|
|
|
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
2005-04-17 05:20:36 +07:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
sched/core: Remove set_task_state()
This is a nasty interface and setting the state of a foreign task must
not be done. As of the following commit:
be628be0956 ("bcache: Make gc wakeup sane, remove set_task_state()")
... everyone in the kernel calls set_task_state() with current, allowing
the helper to be removed.
However, as the comment indicates, it is still around for those archs
where computing current is more expensive than using a pointer, at least
in theory. An important arch that is affected is arm64, however this has
been addressed now [1] and performance is up to par making no difference
with either calls.
Of all the callers, if any, it's the locking bits that would care most
about this -- ie: we end up passing a tsk pointer to a lot of the lock
slowpath, and setting ->state on that. The following numbers are based
on two tests: a custom ad-hoc microbenchmark that just measures
latencies (for ~65 million calls) between get_task_state() vs
get_current_state().
Secondly for a higher overview, an unlink microbenchmark was used,
which pounds on a single file with open, close,unlink combos with
increasing thread counts (up to 4x ncpus). While the workload is quite
unrealistic, it does contend a lot on the inode mutex or now rwsem.
[1] https://lkml.kernel.org/r/1483468021-8237-1-git-send-email-mark.rutland@arm.com
== 1. x86-64 ==
Avg runtime set_task_state(): 601 msecs
Avg runtime set_current_state(): 552 msecs
vanilla dirty
Hmean unlink1-processes-2 36089.26 ( 0.00%) 38977.33 ( 8.00%)
Hmean unlink1-processes-5 28555.01 ( 0.00%) 29832.55 ( 4.28%)
Hmean unlink1-processes-8 37323.75 ( 0.00%) 44974.57 ( 20.50%)
Hmean unlink1-processes-12 43571.88 ( 0.00%) 44283.01 ( 1.63%)
Hmean unlink1-processes-21 34431.52 ( 0.00%) 38284.45 ( 11.19%)
Hmean unlink1-processes-30 34813.26 ( 0.00%) 37975.17 ( 9.08%)
Hmean unlink1-processes-48 37048.90 ( 0.00%) 39862.78 ( 7.59%)
Hmean unlink1-processes-79 35630.01 ( 0.00%) 36855.30 ( 3.44%)
Hmean unlink1-processes-110 36115.85 ( 0.00%) 39843.91 ( 10.32%)
Hmean unlink1-processes-141 32546.96 ( 0.00%) 35418.52 ( 8.82%)
Hmean unlink1-processes-172 34674.79 ( 0.00%) 36899.21 ( 6.42%)
Hmean unlink1-processes-203 37303.11 ( 0.00%) 36393.04 ( -2.44%)
Hmean unlink1-processes-224 35712.13 ( 0.00%) 36685.96 ( 2.73%)
== 2. ppc64le ==
Avg runtime set_task_state(): 938 msecs
Avg runtime set_current_state: 940 msecs
vanilla dirty
Hmean unlink1-processes-2 19269.19 ( 0.00%) 30704.50 ( 59.35%)
Hmean unlink1-processes-5 20106.15 ( 0.00%) 21804.15 ( 8.45%)
Hmean unlink1-processes-8 17496.97 ( 0.00%) 17243.28 ( -1.45%)
Hmean unlink1-processes-12 14224.15 ( 0.00%) 17240.21 ( 21.20%)
Hmean unlink1-processes-21 14155.66 ( 0.00%) 15681.23 ( 10.78%)
Hmean unlink1-processes-30 14450.70 ( 0.00%) 15995.83 ( 10.69%)
Hmean unlink1-processes-48 16945.57 ( 0.00%) 16370.42 ( -3.39%)
Hmean unlink1-processes-79 15788.39 ( 0.00%) 14639.27 ( -7.28%)
Hmean unlink1-processes-110 14268.48 ( 0.00%) 14377.40 ( 0.76%)
Hmean unlink1-processes-141 14023.65 ( 0.00%) 16271.69 ( 16.03%)
Hmean unlink1-processes-172 13417.62 ( 0.00%) 16067.55 ( 19.75%)
Hmean unlink1-processes-203 15293.08 ( 0.00%) 15440.40 ( 0.96%)
Hmean unlink1-processes-234 13719.32 ( 0.00%) 16190.74 ( 18.01%)
Hmean unlink1-processes-265 16400.97 ( 0.00%) 16115.22 ( -1.74%)
Hmean unlink1-processes-296 14388.60 ( 0.00%) 16216.13 ( 12.70%)
Hmean unlink1-processes-320 15771.85 ( 0.00%) 15905.96 ( 0.85%)
x86-64 (known to be fast for get_current()/this_cpu_read_stable() caching)
and ppc64 (with paca) show similar improvements in the unlink microbenches.
The small delta for ppc64 (2ms), does not represent the gains on the unlink
runs. In the case of x86, there was a decent amount of variation in the
latency runs, but always within a 20 to 50ms increase), ppc was more constant.
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dave@stgolabs.net
Cc: mark.rutland@arm.com
Link: http://lkml.kernel.org/r/1483479794-14013-5-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-01-04 04:43:14 +07:00
|
|
|
set_current_state(TASK_UNINTERRUPTIBLE);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* set up my own style of waitqueue */
|
2017-01-04 04:43:13 +07:00
|
|
|
waiter.task = current;
|
2013-05-07 20:45:49 +07:00
|
|
|
waiter.type = RWSEM_WAITING_FOR_READ;
|
2017-01-04 04:43:13 +07:00
|
|
|
get_task_struct(current);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
list_add_tail(&waiter.list, &sem->wait_list);
|
|
|
|
|
|
|
|
/* we don't need to touch the semaphore struct anymore */
|
2010-02-24 15:54:54 +07:00
|
|
|
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* wait to be given the lock */
|
|
|
|
for (;;) {
|
|
|
|
if (!waiter.task)
|
|
|
|
break;
|
|
|
|
schedule();
|
sched/core: Remove set_task_state()
This is a nasty interface and setting the state of a foreign task must
not be done. As of the following commit:
be628be0956 ("bcache: Make gc wakeup sane, remove set_task_state()")
... everyone in the kernel calls set_task_state() with current, allowing
the helper to be removed.
However, as the comment indicates, it is still around for those archs
where computing current is more expensive than using a pointer, at least
in theory. An important arch that is affected is arm64, however this has
been addressed now [1] and performance is up to par making no difference
with either calls.
Of all the callers, if any, it's the locking bits that would care most
about this -- ie: we end up passing a tsk pointer to a lot of the lock
slowpath, and setting ->state on that. The following numbers are based
on two tests: a custom ad-hoc microbenchmark that just measures
latencies (for ~65 million calls) between get_task_state() vs
get_current_state().
Secondly for a higher overview, an unlink microbenchmark was used,
which pounds on a single file with open, close,unlink combos with
increasing thread counts (up to 4x ncpus). While the workload is quite
unrealistic, it does contend a lot on the inode mutex or now rwsem.
[1] https://lkml.kernel.org/r/1483468021-8237-1-git-send-email-mark.rutland@arm.com
== 1. x86-64 ==
Avg runtime set_task_state(): 601 msecs
Avg runtime set_current_state(): 552 msecs
vanilla dirty
Hmean unlink1-processes-2 36089.26 ( 0.00%) 38977.33 ( 8.00%)
Hmean unlink1-processes-5 28555.01 ( 0.00%) 29832.55 ( 4.28%)
Hmean unlink1-processes-8 37323.75 ( 0.00%) 44974.57 ( 20.50%)
Hmean unlink1-processes-12 43571.88 ( 0.00%) 44283.01 ( 1.63%)
Hmean unlink1-processes-21 34431.52 ( 0.00%) 38284.45 ( 11.19%)
Hmean unlink1-processes-30 34813.26 ( 0.00%) 37975.17 ( 9.08%)
Hmean unlink1-processes-48 37048.90 ( 0.00%) 39862.78 ( 7.59%)
Hmean unlink1-processes-79 35630.01 ( 0.00%) 36855.30 ( 3.44%)
Hmean unlink1-processes-110 36115.85 ( 0.00%) 39843.91 ( 10.32%)
Hmean unlink1-processes-141 32546.96 ( 0.00%) 35418.52 ( 8.82%)
Hmean unlink1-processes-172 34674.79 ( 0.00%) 36899.21 ( 6.42%)
Hmean unlink1-processes-203 37303.11 ( 0.00%) 36393.04 ( -2.44%)
Hmean unlink1-processes-224 35712.13 ( 0.00%) 36685.96 ( 2.73%)
== 2. ppc64le ==
Avg runtime set_task_state(): 938 msecs
Avg runtime set_current_state: 940 msecs
vanilla dirty
Hmean unlink1-processes-2 19269.19 ( 0.00%) 30704.50 ( 59.35%)
Hmean unlink1-processes-5 20106.15 ( 0.00%) 21804.15 ( 8.45%)
Hmean unlink1-processes-8 17496.97 ( 0.00%) 17243.28 ( -1.45%)
Hmean unlink1-processes-12 14224.15 ( 0.00%) 17240.21 ( 21.20%)
Hmean unlink1-processes-21 14155.66 ( 0.00%) 15681.23 ( 10.78%)
Hmean unlink1-processes-30 14450.70 ( 0.00%) 15995.83 ( 10.69%)
Hmean unlink1-processes-48 16945.57 ( 0.00%) 16370.42 ( -3.39%)
Hmean unlink1-processes-79 15788.39 ( 0.00%) 14639.27 ( -7.28%)
Hmean unlink1-processes-110 14268.48 ( 0.00%) 14377.40 ( 0.76%)
Hmean unlink1-processes-141 14023.65 ( 0.00%) 16271.69 ( 16.03%)
Hmean unlink1-processes-172 13417.62 ( 0.00%) 16067.55 ( 19.75%)
Hmean unlink1-processes-203 15293.08 ( 0.00%) 15440.40 ( 0.96%)
Hmean unlink1-processes-234 13719.32 ( 0.00%) 16190.74 ( 18.01%)
Hmean unlink1-processes-265 16400.97 ( 0.00%) 16115.22 ( -1.74%)
Hmean unlink1-processes-296 14388.60 ( 0.00%) 16216.13 ( 12.70%)
Hmean unlink1-processes-320 15771.85 ( 0.00%) 15905.96 ( 0.85%)
x86-64 (known to be fast for get_current()/this_cpu_read_stable() caching)
and ppc64 (with paca) show similar improvements in the unlink microbenches.
The small delta for ppc64 (2ms), does not represent the gains on the unlink
runs. In the case of x86, there was a decent amount of variation in the
latency runs, but always within a 20 to 50ms increase), ppc was more constant.
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dave@stgolabs.net
Cc: mark.rutland@arm.com
Link: http://lkml.kernel.org/r/1483479794-14013-5-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-01-04 04:43:14 +07:00
|
|
|
set_current_state(TASK_UNINTERRUPTIBLE);
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
sched/core: Remove set_task_state()
This is a nasty interface and setting the state of a foreign task must
not be done. As of the following commit:
be628be0956 ("bcache: Make gc wakeup sane, remove set_task_state()")
... everyone in the kernel calls set_task_state() with current, allowing
the helper to be removed.
However, as the comment indicates, it is still around for those archs
where computing current is more expensive than using a pointer, at least
in theory. An important arch that is affected is arm64, however this has
been addressed now [1] and performance is up to par making no difference
with either calls.
Of all the callers, if any, it's the locking bits that would care most
about this -- ie: we end up passing a tsk pointer to a lot of the lock
slowpath, and setting ->state on that. The following numbers are based
on two tests: a custom ad-hoc microbenchmark that just measures
latencies (for ~65 million calls) between get_task_state() vs
get_current_state().
Secondly for a higher overview, an unlink microbenchmark was used,
which pounds on a single file with open, close,unlink combos with
increasing thread counts (up to 4x ncpus). While the workload is quite
unrealistic, it does contend a lot on the inode mutex or now rwsem.
[1] https://lkml.kernel.org/r/1483468021-8237-1-git-send-email-mark.rutland@arm.com
== 1. x86-64 ==
Avg runtime set_task_state(): 601 msecs
Avg runtime set_current_state(): 552 msecs
vanilla dirty
Hmean unlink1-processes-2 36089.26 ( 0.00%) 38977.33 ( 8.00%)
Hmean unlink1-processes-5 28555.01 ( 0.00%) 29832.55 ( 4.28%)
Hmean unlink1-processes-8 37323.75 ( 0.00%) 44974.57 ( 20.50%)
Hmean unlink1-processes-12 43571.88 ( 0.00%) 44283.01 ( 1.63%)
Hmean unlink1-processes-21 34431.52 ( 0.00%) 38284.45 ( 11.19%)
Hmean unlink1-processes-30 34813.26 ( 0.00%) 37975.17 ( 9.08%)
Hmean unlink1-processes-48 37048.90 ( 0.00%) 39862.78 ( 7.59%)
Hmean unlink1-processes-79 35630.01 ( 0.00%) 36855.30 ( 3.44%)
Hmean unlink1-processes-110 36115.85 ( 0.00%) 39843.91 ( 10.32%)
Hmean unlink1-processes-141 32546.96 ( 0.00%) 35418.52 ( 8.82%)
Hmean unlink1-processes-172 34674.79 ( 0.00%) 36899.21 ( 6.42%)
Hmean unlink1-processes-203 37303.11 ( 0.00%) 36393.04 ( -2.44%)
Hmean unlink1-processes-224 35712.13 ( 0.00%) 36685.96 ( 2.73%)
== 2. ppc64le ==
Avg runtime set_task_state(): 938 msecs
Avg runtime set_current_state: 940 msecs
vanilla dirty
Hmean unlink1-processes-2 19269.19 ( 0.00%) 30704.50 ( 59.35%)
Hmean unlink1-processes-5 20106.15 ( 0.00%) 21804.15 ( 8.45%)
Hmean unlink1-processes-8 17496.97 ( 0.00%) 17243.28 ( -1.45%)
Hmean unlink1-processes-12 14224.15 ( 0.00%) 17240.21 ( 21.20%)
Hmean unlink1-processes-21 14155.66 ( 0.00%) 15681.23 ( 10.78%)
Hmean unlink1-processes-30 14450.70 ( 0.00%) 15995.83 ( 10.69%)
Hmean unlink1-processes-48 16945.57 ( 0.00%) 16370.42 ( -3.39%)
Hmean unlink1-processes-79 15788.39 ( 0.00%) 14639.27 ( -7.28%)
Hmean unlink1-processes-110 14268.48 ( 0.00%) 14377.40 ( 0.76%)
Hmean unlink1-processes-141 14023.65 ( 0.00%) 16271.69 ( 16.03%)
Hmean unlink1-processes-172 13417.62 ( 0.00%) 16067.55 ( 19.75%)
Hmean unlink1-processes-203 15293.08 ( 0.00%) 15440.40 ( 0.96%)
Hmean unlink1-processes-234 13719.32 ( 0.00%) 16190.74 ( 18.01%)
Hmean unlink1-processes-265 16400.97 ( 0.00%) 16115.22 ( -1.74%)
Hmean unlink1-processes-296 14388.60 ( 0.00%) 16216.13 ( 12.70%)
Hmean unlink1-processes-320 15771.85 ( 0.00%) 15905.96 ( 0.85%)
x86-64 (known to be fast for get_current()/this_cpu_read_stable() caching)
and ppc64 (with paca) show similar improvements in the unlink microbenches.
The small delta for ppc64 (2ms), does not represent the gains on the unlink
runs. In the case of x86, there was a decent amount of variation in the
latency runs, but always within a 20 to 50ms increase), ppc was more constant.
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dave@stgolabs.net
Cc: mark.rutland@arm.com
Link: http://lkml.kernel.org/r/1483479794-14013-5-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-01-04 04:43:14 +07:00
|
|
|
__set_current_state(TASK_RUNNING);
|
2005-04-17 05:20:36 +07:00
|
|
|
out:
|
2006-07-03 14:24:29 +07:00
|
|
|
;
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* trylock for reading -- returns 1 if successful, 0 if contention
|
|
|
|
*/
|
2008-02-08 19:19:55 +07:00
|
|
|
int __down_read_trylock(struct rw_semaphore *sem)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
|
2010-02-24 15:54:54 +07:00
|
|
|
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2014-07-16 19:54:55 +07:00
|
|
|
if (sem->count >= 0 && list_empty(&sem->wait_list)) {
|
2005-04-17 05:20:36 +07:00
|
|
|
/* granted */
|
2014-07-16 19:54:55 +07:00
|
|
|
sem->count++;
|
2005-04-17 05:20:36 +07:00
|
|
|
ret = 1;
|
|
|
|
}
|
|
|
|
|
2010-02-24 15:54:54 +07:00
|
|
|
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* get a write lock on the semaphore
|
|
|
|
*/
|
2016-04-07 22:12:26 +07:00
|
|
|
int __sched __down_write_common(struct rw_semaphore *sem, int state)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
|
|
|
struct rwsem_waiter waiter;
|
2010-04-08 01:52:46 +07:00
|
|
|
unsigned long flags;
|
2016-04-07 22:12:26 +07:00
|
|
|
int ret = 0;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2010-02-24 15:54:54 +07:00
|
|
|
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* set up my own style of waitqueue */
|
2017-01-04 04:43:13 +07:00
|
|
|
waiter.task = current;
|
2013-05-07 20:45:49 +07:00
|
|
|
waiter.type = RWSEM_WAITING_FOR_WRITE;
|
2005-04-17 05:20:36 +07:00
|
|
|
list_add_tail(&waiter.list, &sem->wait_list);
|
|
|
|
|
2013-02-01 17:59:16 +07:00
|
|
|
/* wait for someone to release the lock */
|
2005-04-17 05:20:36 +07:00
|
|
|
for (;;) {
|
2013-02-01 17:59:16 +07:00
|
|
|
/*
|
|
|
|
* That is the key to support write lock stealing: allows the
|
|
|
|
* task already on CPU to get the lock soon rather than put
|
|
|
|
* itself into sleep and waiting for system woke it or someone
|
|
|
|
* else in the head of the wait list up.
|
|
|
|
*/
|
2014-07-16 19:54:55 +07:00
|
|
|
if (sem->count == 0)
|
2005-04-17 05:20:36 +07:00
|
|
|
break;
|
2017-02-25 07:17:53 +07:00
|
|
|
if (signal_pending_state(state, current))
|
|
|
|
goto out_nolock;
|
|
|
|
|
sched/core: Remove set_task_state()
This is a nasty interface and setting the state of a foreign task must
not be done. As of the following commit:
be628be0956 ("bcache: Make gc wakeup sane, remove set_task_state()")
... everyone in the kernel calls set_task_state() with current, allowing
the helper to be removed.
However, as the comment indicates, it is still around for those archs
where computing current is more expensive than using a pointer, at least
in theory. An important arch that is affected is arm64, however this has
been addressed now [1] and performance is up to par making no difference
with either calls.
Of all the callers, if any, it's the locking bits that would care most
about this -- ie: we end up passing a tsk pointer to a lot of the lock
slowpath, and setting ->state on that. The following numbers are based
on two tests: a custom ad-hoc microbenchmark that just measures
latencies (for ~65 million calls) between get_task_state() vs
get_current_state().
Secondly for a higher overview, an unlink microbenchmark was used,
which pounds on a single file with open, close,unlink combos with
increasing thread counts (up to 4x ncpus). While the workload is quite
unrealistic, it does contend a lot on the inode mutex or now rwsem.
[1] https://lkml.kernel.org/r/1483468021-8237-1-git-send-email-mark.rutland@arm.com
== 1. x86-64 ==
Avg runtime set_task_state(): 601 msecs
Avg runtime set_current_state(): 552 msecs
vanilla dirty
Hmean unlink1-processes-2 36089.26 ( 0.00%) 38977.33 ( 8.00%)
Hmean unlink1-processes-5 28555.01 ( 0.00%) 29832.55 ( 4.28%)
Hmean unlink1-processes-8 37323.75 ( 0.00%) 44974.57 ( 20.50%)
Hmean unlink1-processes-12 43571.88 ( 0.00%) 44283.01 ( 1.63%)
Hmean unlink1-processes-21 34431.52 ( 0.00%) 38284.45 ( 11.19%)
Hmean unlink1-processes-30 34813.26 ( 0.00%) 37975.17 ( 9.08%)
Hmean unlink1-processes-48 37048.90 ( 0.00%) 39862.78 ( 7.59%)
Hmean unlink1-processes-79 35630.01 ( 0.00%) 36855.30 ( 3.44%)
Hmean unlink1-processes-110 36115.85 ( 0.00%) 39843.91 ( 10.32%)
Hmean unlink1-processes-141 32546.96 ( 0.00%) 35418.52 ( 8.82%)
Hmean unlink1-processes-172 34674.79 ( 0.00%) 36899.21 ( 6.42%)
Hmean unlink1-processes-203 37303.11 ( 0.00%) 36393.04 ( -2.44%)
Hmean unlink1-processes-224 35712.13 ( 0.00%) 36685.96 ( 2.73%)
== 2. ppc64le ==
Avg runtime set_task_state(): 938 msecs
Avg runtime set_current_state: 940 msecs
vanilla dirty
Hmean unlink1-processes-2 19269.19 ( 0.00%) 30704.50 ( 59.35%)
Hmean unlink1-processes-5 20106.15 ( 0.00%) 21804.15 ( 8.45%)
Hmean unlink1-processes-8 17496.97 ( 0.00%) 17243.28 ( -1.45%)
Hmean unlink1-processes-12 14224.15 ( 0.00%) 17240.21 ( 21.20%)
Hmean unlink1-processes-21 14155.66 ( 0.00%) 15681.23 ( 10.78%)
Hmean unlink1-processes-30 14450.70 ( 0.00%) 15995.83 ( 10.69%)
Hmean unlink1-processes-48 16945.57 ( 0.00%) 16370.42 ( -3.39%)
Hmean unlink1-processes-79 15788.39 ( 0.00%) 14639.27 ( -7.28%)
Hmean unlink1-processes-110 14268.48 ( 0.00%) 14377.40 ( 0.76%)
Hmean unlink1-processes-141 14023.65 ( 0.00%) 16271.69 ( 16.03%)
Hmean unlink1-processes-172 13417.62 ( 0.00%) 16067.55 ( 19.75%)
Hmean unlink1-processes-203 15293.08 ( 0.00%) 15440.40 ( 0.96%)
Hmean unlink1-processes-234 13719.32 ( 0.00%) 16190.74 ( 18.01%)
Hmean unlink1-processes-265 16400.97 ( 0.00%) 16115.22 ( -1.74%)
Hmean unlink1-processes-296 14388.60 ( 0.00%) 16216.13 ( 12.70%)
Hmean unlink1-processes-320 15771.85 ( 0.00%) 15905.96 ( 0.85%)
x86-64 (known to be fast for get_current()/this_cpu_read_stable() caching)
and ppc64 (with paca) show similar improvements in the unlink microbenches.
The small delta for ppc64 (2ms), does not represent the gains on the unlink
runs. In the case of x86, there was a decent amount of variation in the
latency runs, but always within a 20 to 50ms increase), ppc was more constant.
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dave@stgolabs.net
Cc: mark.rutland@arm.com
Link: http://lkml.kernel.org/r/1483479794-14013-5-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-01-04 04:43:14 +07:00
|
|
|
set_current_state(state);
|
2013-02-01 17:59:16 +07:00
|
|
|
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
|
|
|
schedule();
|
|
|
|
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
2013-02-01 17:59:16 +07:00
|
|
|
/* got the lock */
|
2014-07-16 19:54:55 +07:00
|
|
|
sem->count = -1;
|
2013-02-01 17:59:16 +07:00
|
|
|
list_del(&waiter.list);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2013-02-01 17:59:16 +07:00
|
|
|
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
2016-04-07 22:12:26 +07:00
|
|
|
|
|
|
|
return ret;
|
2017-02-25 07:17:53 +07:00
|
|
|
|
|
|
|
out_nolock:
|
|
|
|
list_del(&waiter.list);
|
locking/rwsem-spinlock: Fix EINTR branch in __down_write_common()
If a writer could been woken up, the above branch
if (sem->count == 0)
break;
would have moved us to taking the sem. So, it's
not the time to wake a writer now, and only readers
are allowed now. Thus, 0 must be passed to __rwsem_do_wake().
Next, __rwsem_do_wake() wakes readers unconditionally.
But we mustn't do that if the sem is owned by writer
in the moment. Otherwise, writer and reader own the sem
the same time, which leads to memory corruption in
callers.
rwsem-xadd.c does not need that, as:
1) the similar check is made lockless there,
2) in __rwsem_mark_wake::try_reader_grant we test,
that sem is not owned by writer.
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: <stable@vger.kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Niklas Cassel <niklas.cassel@axis.com>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 17fcbd590d0c "locking/rwsem: Fix down_write_killable() for CONFIG_RWSEM_GENERIC_SPINLOCK=y"
Link: http://lkml.kernel.org/r/149762063282.19811.9129615532201147826.stgit@localhost.localdomain
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-06-16 20:44:34 +07:00
|
|
|
if (!list_empty(&sem->wait_list) && sem->count >= 0)
|
|
|
|
__rwsem_do_wake(sem, 0);
|
2017-02-25 07:17:53 +07:00
|
|
|
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
|
|
|
|
|
|
|
return -EINTR;
|
2016-04-07 22:12:26 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
void __sched __down_write(struct rw_semaphore *sem)
|
|
|
|
{
|
|
|
|
__down_write_common(sem, TASK_UNINTERRUPTIBLE);
|
|
|
|
}
|
|
|
|
|
|
|
|
int __sched __down_write_killable(struct rw_semaphore *sem)
|
|
|
|
{
|
|
|
|
return __down_write_common(sem, TASK_KILLABLE);
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* trylock for writing -- returns 1 if successful, 0 if contention
|
|
|
|
*/
|
2008-02-08 19:19:55 +07:00
|
|
|
int __down_write_trylock(struct rw_semaphore *sem)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
int ret = 0;
|
|
|
|
|
2010-02-24 15:54:54 +07:00
|
|
|
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2014-07-16 19:54:55 +07:00
|
|
|
if (sem->count == 0) {
|
2013-02-01 17:59:16 +07:00
|
|
|
/* got the lock */
|
2014-07-16 19:54:55 +07:00
|
|
|
sem->count = -1;
|
2005-04-17 05:20:36 +07:00
|
|
|
ret = 1;
|
|
|
|
}
|
|
|
|
|
2010-02-24 15:54:54 +07:00
|
|
|
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* release a read lock on the semaphore
|
|
|
|
*/
|
2008-02-08 19:19:55 +07:00
|
|
|
void __up_read(struct rw_semaphore *sem)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
2010-02-24 15:54:54 +07:00
|
|
|
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2014-07-16 19:54:55 +07:00
|
|
|
if (--sem->count == 0 && !list_empty(&sem->wait_list))
|
2005-04-17 05:20:36 +07:00
|
|
|
sem = __rwsem_wake_one_writer(sem);
|
|
|
|
|
2010-02-24 15:54:54 +07:00
|
|
|
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* release a write lock on the semaphore
|
|
|
|
*/
|
2008-02-08 19:19:55 +07:00
|
|
|
void __up_write(struct rw_semaphore *sem)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
2010-02-24 15:54:54 +07:00
|
|
|
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2014-07-16 19:54:55 +07:00
|
|
|
sem->count = 0;
|
2005-04-17 05:20:36 +07:00
|
|
|
if (!list_empty(&sem->wait_list))
|
|
|
|
sem = __rwsem_do_wake(sem, 1);
|
|
|
|
|
2010-02-24 15:54:54 +07:00
|
|
|
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* downgrade a write lock into a read lock
|
|
|
|
* - just wake up any readers at the front of the queue
|
|
|
|
*/
|
2008-02-08 19:19:55 +07:00
|
|
|
void __downgrade_write(struct rw_semaphore *sem)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
2010-02-24 15:54:54 +07:00
|
|
|
raw_spin_lock_irqsave(&sem->wait_lock, flags);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2014-07-16 19:54:55 +07:00
|
|
|
sem->count = 1;
|
2005-04-17 05:20:36 +07:00
|
|
|
if (!list_empty(&sem->wait_list))
|
|
|
|
sem = __rwsem_do_wake(sem, 0);
|
|
|
|
|
2010-02-24 15:54:54 +07:00
|
|
|
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|