2005-04-17 05:20:36 +07:00
|
|
|
#ifndef _LINUX_WAIT_H
|
|
|
|
#define _LINUX_WAIT_H
|
2013-10-04 15:24:49 +07:00
|
|
|
/*
|
|
|
|
* Linux wait queue related types and methods
|
|
|
|
*/
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <linux/list.h>
|
|
|
|
#include <linux/stddef.h>
|
|
|
|
#include <linux/spinlock.h>
|
|
|
|
#include <asm/current.h>
|
2012-10-13 16:46:48 +07:00
|
|
|
#include <uapi/linux/wait.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
typedef struct __wait_queue wait_queue_t;
|
2009-09-15 00:55:44 +07:00
|
|
|
typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
|
|
|
|
int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
struct __wait_queue {
|
2013-10-04 15:24:49 +07:00
|
|
|
unsigned int flags;
|
2005-04-17 05:20:36 +07:00
|
|
|
#define WQ_FLAG_EXCLUSIVE 0x01
|
2013-10-04 15:24:49 +07:00
|
|
|
void *private;
|
|
|
|
wait_queue_func_t func;
|
|
|
|
struct list_head task_list;
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
struct wait_bit_key {
|
2013-10-04 15:24:49 +07:00
|
|
|
void *flags;
|
|
|
|
int bit_nr;
|
|
|
|
#define WAIT_ATOMIC_T_BIT_NR -1
|
2014-07-07 12:16:04 +07:00
|
|
|
unsigned long private;
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
struct wait_bit_queue {
|
2013-10-04 15:24:49 +07:00
|
|
|
struct wait_bit_key key;
|
|
|
|
wait_queue_t wait;
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
struct __wait_queue_head {
|
2013-10-04 15:24:49 +07:00
|
|
|
spinlock_t lock;
|
|
|
|
struct list_head task_list;
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
typedef struct __wait_queue_head wait_queue_head_t;
|
|
|
|
|
2005-11-07 15:59:43 +07:00
|
|
|
struct task_struct;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Macros for declaration and initialisaton of the datatypes
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define __WAITQUEUE_INITIALIZER(name, tsk) { \
|
2005-06-23 14:10:27 +07:00
|
|
|
.private = tsk, \
|
2005-04-17 05:20:36 +07:00
|
|
|
.func = default_wake_function, \
|
|
|
|
.task_list = { NULL, NULL } }
|
|
|
|
|
|
|
|
#define DECLARE_WAITQUEUE(name, tsk) \
|
|
|
|
wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)
|
|
|
|
|
|
|
|
#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \
|
2006-07-03 14:24:34 +07:00
|
|
|
.lock = __SPIN_LOCK_UNLOCKED(name.lock), \
|
2005-04-17 05:20:36 +07:00
|
|
|
.task_list = { &(name).task_list, &(name).task_list } }
|
|
|
|
|
|
|
|
#define DECLARE_WAIT_QUEUE_HEAD(name) \
|
|
|
|
wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
|
|
|
|
|
|
|
|
#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \
|
|
|
|
{ .flags = word, .bit_nr = bit, }
|
|
|
|
|
Add wait_on_atomic_t() and wake_up_atomic_t()
Add wait_on_atomic_t() and wake_up_atomic_t() to indicate became-zero events on
atomic_t types. This uses the bit-wake waitqueue table. The key is set to a
value outside of the number of bits in a long so that wait_on_bit() won't be
woken up accidentally.
What I'm using this for is: in a following patch I add a counter to struct
fscache_cookie to count the number of outstanding operations that need access
to netfs data. The way this works is:
(1) When a cookie is allocated, the counter is initialised to 1.
(2) When an operation wants to access netfs data, it calls atomic_inc_unless()
to increment the counter before it does so. If it was 0, then the counter
isn't incremented, the operation isn't permitted to access the netfs data
(which might by this point no longer exist) and the operation aborts in
some appropriate manner.
(3) When an operation finishes with the netfs data, it decrements the counter
and if it reaches 0, calls wake_up_atomic_t() on it - the assumption being
that it was the last blocker.
(4) When a cookie is released, the counter is decremented and the releaser
uses wait_on_atomic_t() to wait for the counter to become 0 - which should
indicate no one is using the netfs data any longer. The netfs data can
then be destroyed.
There are some alternatives that I have thought of and that have been suggested
by Tejun Heo:
(A) Using wait_on_bit() to wait on a bit in the counter. This doesn't work
because if that bit happens to be 0 then the wait won't happen - even if
the counter is non-zero.
(B) Using wait_on_bit() to wait on a flag elsewhere which is cleared when the
counter reaches 0. Such a flag would be redundant and would add
complexity.
(C) Adding a waitqueue to fscache_cookie - this would expand that struct by
several words for an event that happens just once in each cookie's
lifetime. Further, cookies are generally per-file so there are likely to
be a lot of them.
(D) Similar to (C), but add a pointer to a waitqueue in the cookie instead of
a waitqueue. This would add single word per cookie and so would be less
of an expansion - but still an expansion.
(E) Adding a static waitqueue to the fscache module. Generally this would be
fine, but under certain circumstances many cookies will all get added at
the same time (eg. NFS umount, cache withdrawal) thereby presenting
scaling issues. Note that the wait may be significant as disk I/O may be
in progress.
So, I think reusing the wait_on_bit() waitqueue set is reasonable. I don't
make much use of the waitqueue I need on a per-cookie basis, but sometimes I
have a huge flood of the cookies to deal with.
I also don't want to add a whole new set of global waitqueue tables
specifically for the dec-to-0 event if I can reuse the bit tables.
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-By: Milosz Tanski <milosz@adfin.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
2013-05-11 01:50:26 +07:00
|
|
|
#define __WAIT_ATOMIC_T_KEY_INITIALIZER(p) \
|
|
|
|
{ .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, }
|
|
|
|
|
2011-12-13 19:20:54 +07:00
|
|
|
extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *);
|
2009-08-10 18:33:05 +07:00
|
|
|
|
|
|
|
#define init_waitqueue_head(q) \
|
|
|
|
do { \
|
|
|
|
static struct lock_class_key __key; \
|
|
|
|
\
|
2011-12-13 19:20:54 +07:00
|
|
|
__init_waitqueue_head((q), #q, &__key); \
|
2009-08-10 18:33:05 +07:00
|
|
|
} while (0)
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2006-10-30 13:46:36 +07:00
|
|
|
#ifdef CONFIG_LOCKDEP
|
|
|
|
# define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \
|
|
|
|
({ init_waitqueue_head(&name); name; })
|
|
|
|
# define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) \
|
|
|
|
wait_queue_head_t name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name)
|
|
|
|
#else
|
|
|
|
# define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name)
|
|
|
|
#endif
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
|
|
|
|
{
|
2013-10-04 15:24:49 +07:00
|
|
|
q->flags = 0;
|
|
|
|
q->private = p;
|
|
|
|
q->func = default_wake_function;
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
2013-10-04 15:24:49 +07:00
|
|
|
static inline void
|
|
|
|
init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2013-10-04 15:24:49 +07:00
|
|
|
q->flags = 0;
|
|
|
|
q->private = NULL;
|
|
|
|
q->func = func;
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline int waitqueue_active(wait_queue_head_t *q)
|
|
|
|
{
|
|
|
|
return !list_empty(&q->task_list);
|
|
|
|
}
|
|
|
|
|
2008-02-14 06:03:15 +07:00
|
|
|
extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
|
|
|
|
extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait);
|
|
|
|
extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
|
|
|
|
{
|
|
|
|
list_add(&new->task_list, &head->task_list);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Used for wake-one threads:
|
|
|
|
*/
|
2013-10-04 15:24:49 +07:00
|
|
|
static inline void
|
|
|
|
__add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
|
2010-05-07 13:33:26 +07:00
|
|
|
{
|
|
|
|
wait->flags |= WQ_FLAG_EXCLUSIVE;
|
|
|
|
__add_wait_queue(q, wait);
|
|
|
|
}
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
static inline void __add_wait_queue_tail(wait_queue_head_t *head,
|
2010-05-07 13:33:26 +07:00
|
|
|
wait_queue_t *new)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
|
|
|
list_add_tail(&new->task_list, &head->task_list);
|
|
|
|
}
|
|
|
|
|
2013-10-04 15:24:49 +07:00
|
|
|
static inline void
|
|
|
|
__add_wait_queue_tail_exclusive(wait_queue_head_t *q, wait_queue_t *wait)
|
2010-05-07 13:33:26 +07:00
|
|
|
{
|
|
|
|
wait->flags |= WQ_FLAG_EXCLUSIVE;
|
|
|
|
__add_wait_queue_tail(q, wait);
|
|
|
|
}
|
|
|
|
|
2013-10-04 15:24:49 +07:00
|
|
|
static inline void
|
|
|
|
__remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
|
|
|
list_del(&old->task_list);
|
|
|
|
}
|
|
|
|
|
2014-07-07 12:16:04 +07:00
|
|
|
typedef int wait_bit_action_f(struct wait_bit_key *);
|
2008-02-14 06:03:15 +07:00
|
|
|
void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
|
epoll keyed wakeups: add __wake_up_locked_key() and __wake_up_sync_key()
This patchset introduces wakeup hints for some of the most popular (from
epoll POV) devices, so that epoll code can avoid spurious wakeups on its
waiters.
The problem with epoll is that the callback-based wakeups do not, ATM,
carry any information about the events the wakeup is related to. So the
only choice epoll has (not being able to call f_op->poll() from inside the
callback), is to add the file* to a ready-list and resolve the real events
later on, at epoll_wait() (or its own f_op->poll()) time. This can cause
spurious wakeups, since the wake_up() itself might be for an event the
caller is not interested into.
The rate of these spurious wakeup can be pretty high in case of many
network sockets being monitored.
By allowing devices to report the events the wakeups refer to (at least
the two major classes - POLLIN/POLLOUT), we are able to spare useless
wakeups by proper handling inside the epoll's poll callback.
Epoll will have in any case to call f_op->poll() on the file* later on,
since the change to be done in order to have the full event set sent via
wakeup, is too invasive for the way our f_op->poll() system works (the
full event set is calculated inside the poll function - there are too many
of them to even start thinking the change - also poll/select would need
change too).
Epoll is changed in a way that both devices which send event hints, and
the ones that don't, are correctly handled. The former will gain some
efficiency though.
As a general rule for devices, would be to add an event mask by using
key-aware wakeup macros, when making up poll wait queues. I tested it
(together with the epoll's poll fix patch Andrew has in -mm) and wakeups
for the supported devices are correctly filtered.
Test program available here:
http://www.xmailserver.org/epoll_test.c
This patch:
Nothing revolutionary here. Just using the available "key" that our
wakeup core already support. The __wake_up_locked_key() was no brainer,
since both __wake_up_locked() and __wake_up_locked_key() are thin wrappers
around __wake_up_common().
The __wake_up_sync() function had a body, so the choice was between
borrowing the body for __wake_up_sync_key() and calling it from
__wake_up_sync(), or make an inline and calling it from both. I chose the
former since in most archs it all resolves to "mov $0, REG; jmp ADDR".
Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: David Miller <davem@davemloft.net>
Cc: William Lee Irwin III <wli@movementarian.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-04-01 05:24:20 +07:00
|
|
|
void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
|
2013-10-04 15:24:49 +07:00
|
|
|
void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
|
2011-12-01 06:04:00 +07:00
|
|
|
void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr);
|
epoll keyed wakeups: add __wake_up_locked_key() and __wake_up_sync_key()
This patchset introduces wakeup hints for some of the most popular (from
epoll POV) devices, so that epoll code can avoid spurious wakeups on its
waiters.
The problem with epoll is that the callback-based wakeups do not, ATM,
carry any information about the events the wakeup is related to. So the
only choice epoll has (not being able to call f_op->poll() from inside the
callback), is to add the file* to a ready-list and resolve the real events
later on, at epoll_wait() (or its own f_op->poll()) time. This can cause
spurious wakeups, since the wake_up() itself might be for an event the
caller is not interested into.
The rate of these spurious wakeup can be pretty high in case of many
network sockets being monitored.
By allowing devices to report the events the wakeups refer to (at least
the two major classes - POLLIN/POLLOUT), we are able to spare useless
wakeups by proper handling inside the epoll's poll callback.
Epoll will have in any case to call f_op->poll() on the file* later on,
since the change to be done in order to have the full event set sent via
wakeup, is too invasive for the way our f_op->poll() system works (the
full event set is calculated inside the poll function - there are too many
of them to even start thinking the change - also poll/select would need
change too).
Epoll is changed in a way that both devices which send event hints, and
the ones that don't, are correctly handled. The former will gain some
efficiency though.
As a general rule for devices, would be to add an event mask by using
key-aware wakeup macros, when making up poll wait queues. I tested it
(together with the epoll's poll fix patch Andrew has in -mm) and wakeups
for the supported devices are correctly filtered.
Test program available here:
http://www.xmailserver.org/epoll_test.c
This patch:
Nothing revolutionary here. Just using the available "key" that our
wakeup core already support. The __wake_up_locked_key() was no brainer,
since both __wake_up_locked() and __wake_up_locked_key() are thin wrappers
around __wake_up_common().
The __wake_up_sync() function had a body, so the choice was between
borrowing the body for __wake_up_sync_key() and calling it from
__wake_up_sync(), or make an inline and calling it from both. I chose the
former since in most archs it all resolves to "mov $0, REG; jmp ADDR".
Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: David Miller <davem@davemloft.net>
Cc: William Lee Irwin III <wli@movementarian.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-04-01 05:24:20 +07:00
|
|
|
void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
|
2008-02-14 06:03:15 +07:00
|
|
|
void __wake_up_bit(wait_queue_head_t *, void *, int);
|
2014-07-07 12:16:04 +07:00
|
|
|
int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned);
|
|
|
|
int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned);
|
2008-02-14 06:03:15 +07:00
|
|
|
void wake_up_bit(void *, int);
|
Add wait_on_atomic_t() and wake_up_atomic_t()
Add wait_on_atomic_t() and wake_up_atomic_t() to indicate became-zero events on
atomic_t types. This uses the bit-wake waitqueue table. The key is set to a
value outside of the number of bits in a long so that wait_on_bit() won't be
woken up accidentally.
What I'm using this for is: in a following patch I add a counter to struct
fscache_cookie to count the number of outstanding operations that need access
to netfs data. The way this works is:
(1) When a cookie is allocated, the counter is initialised to 1.
(2) When an operation wants to access netfs data, it calls atomic_inc_unless()
to increment the counter before it does so. If it was 0, then the counter
isn't incremented, the operation isn't permitted to access the netfs data
(which might by this point no longer exist) and the operation aborts in
some appropriate manner.
(3) When an operation finishes with the netfs data, it decrements the counter
and if it reaches 0, calls wake_up_atomic_t() on it - the assumption being
that it was the last blocker.
(4) When a cookie is released, the counter is decremented and the releaser
uses wait_on_atomic_t() to wait for the counter to become 0 - which should
indicate no one is using the netfs data any longer. The netfs data can
then be destroyed.
There are some alternatives that I have thought of and that have been suggested
by Tejun Heo:
(A) Using wait_on_bit() to wait on a bit in the counter. This doesn't work
because if that bit happens to be 0 then the wait won't happen - even if
the counter is non-zero.
(B) Using wait_on_bit() to wait on a flag elsewhere which is cleared when the
counter reaches 0. Such a flag would be redundant and would add
complexity.
(C) Adding a waitqueue to fscache_cookie - this would expand that struct by
several words for an event that happens just once in each cookie's
lifetime. Further, cookies are generally per-file so there are likely to
be a lot of them.
(D) Similar to (C), but add a pointer to a waitqueue in the cookie instead of
a waitqueue. This would add single word per cookie and so would be less
of an expansion - but still an expansion.
(E) Adding a static waitqueue to the fscache module. Generally this would be
fine, but under certain circumstances many cookies will all get added at
the same time (eg. NFS umount, cache withdrawal) thereby presenting
scaling issues. Note that the wait may be significant as disk I/O may be
in progress.
So, I think reusing the wait_on_bit() waitqueue set is reasonable. I don't
make much use of the waitqueue I need on a per-cookie basis, but sometimes I
have a huge flood of the cookies to deal with.
I also don't want to add a whole new set of global waitqueue tables
specifically for the dec-to-0 event if I can reuse the bit tables.
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-By: Milosz Tanski <milosz@adfin.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
2013-05-11 01:50:26 +07:00
|
|
|
void wake_up_atomic_t(atomic_t *);
|
2014-07-07 12:16:04 +07:00
|
|
|
int out_of_line_wait_on_bit(void *, int, wait_bit_action_f *, unsigned);
|
|
|
|
int out_of_line_wait_on_bit_lock(void *, int, wait_bit_action_f *, unsigned);
|
Add wait_on_atomic_t() and wake_up_atomic_t()
Add wait_on_atomic_t() and wake_up_atomic_t() to indicate became-zero events on
atomic_t types. This uses the bit-wake waitqueue table. The key is set to a
value outside of the number of bits in a long so that wait_on_bit() won't be
woken up accidentally.
What I'm using this for is: in a following patch I add a counter to struct
fscache_cookie to count the number of outstanding operations that need access
to netfs data. The way this works is:
(1) When a cookie is allocated, the counter is initialised to 1.
(2) When an operation wants to access netfs data, it calls atomic_inc_unless()
to increment the counter before it does so. If it was 0, then the counter
isn't incremented, the operation isn't permitted to access the netfs data
(which might by this point no longer exist) and the operation aborts in
some appropriate manner.
(3) When an operation finishes with the netfs data, it decrements the counter
and if it reaches 0, calls wake_up_atomic_t() on it - the assumption being
that it was the last blocker.
(4) When a cookie is released, the counter is decremented and the releaser
uses wait_on_atomic_t() to wait for the counter to become 0 - which should
indicate no one is using the netfs data any longer. The netfs data can
then be destroyed.
There are some alternatives that I have thought of and that have been suggested
by Tejun Heo:
(A) Using wait_on_bit() to wait on a bit in the counter. This doesn't work
because if that bit happens to be 0 then the wait won't happen - even if
the counter is non-zero.
(B) Using wait_on_bit() to wait on a flag elsewhere which is cleared when the
counter reaches 0. Such a flag would be redundant and would add
complexity.
(C) Adding a waitqueue to fscache_cookie - this would expand that struct by
several words for an event that happens just once in each cookie's
lifetime. Further, cookies are generally per-file so there are likely to
be a lot of them.
(D) Similar to (C), but add a pointer to a waitqueue in the cookie instead of
a waitqueue. This would add single word per cookie and so would be less
of an expansion - but still an expansion.
(E) Adding a static waitqueue to the fscache module. Generally this would be
fine, but under certain circumstances many cookies will all get added at
the same time (eg. NFS umount, cache withdrawal) thereby presenting
scaling issues. Note that the wait may be significant as disk I/O may be
in progress.
So, I think reusing the wait_on_bit() waitqueue set is reasonable. I don't
make much use of the waitqueue I need on a per-cookie basis, but sometimes I
have a huge flood of the cookies to deal with.
I also don't want to add a whole new set of global waitqueue tables
specifically for the dec-to-0 event if I can reuse the bit tables.
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-By: Milosz Tanski <milosz@adfin.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
2013-05-11 01:50:26 +07:00
|
|
|
int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned);
|
2008-02-14 06:03:15 +07:00
|
|
|
wait_queue_head_t *bit_waitqueue(void *, int);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2007-12-07 05:34:36 +07:00
|
|
|
#define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL)
|
|
|
|
#define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL)
|
|
|
|
#define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL)
|
2011-12-01 06:04:00 +07:00
|
|
|
#define wake_up_locked(x) __wake_up_locked((x), TASK_NORMAL, 1)
|
|
|
|
#define wake_up_all_locked(x) __wake_up_locked((x), TASK_NORMAL, 0)
|
2007-12-07 05:34:36 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
|
|
|
|
#define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
|
|
|
|
#define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL)
|
2007-12-07 05:34:36 +07:00
|
|
|
#define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE, 1)
|
2005-04-17 05:20:36 +07:00
|
|
|
|
lockdep: annotate epoll
On Sat, 2008-01-05 at 13:35 -0800, Davide Libenzi wrote:
> I remember I talked with Arjan about this time ago. Basically, since 1)
> you can drop an epoll fd inside another epoll fd 2) callback-based wakeups
> are used, you can see a wake_up() from inside another wake_up(), but they
> will never refer to the same lock instance.
> Think about:
>
> dfd = socket(...);
> efd1 = epoll_create();
> efd2 = epoll_create();
> epoll_ctl(efd1, EPOLL_CTL_ADD, dfd, ...);
> epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...);
>
> When a packet arrives to the device underneath "dfd", the net code will
> issue a wake_up() on its poll wake list. Epoll (efd1) has installed a
> callback wakeup entry on that queue, and the wake_up() performed by the
> "dfd" net code will end up in ep_poll_callback(). At this point epoll
> (efd1) notices that it may have some event ready, so it needs to wake up
> the waiters on its poll wait list (efd2). So it calls ep_poll_safewake()
> that ends up in another wake_up(), after having checked about the
> recursion constraints. That are, no more than EP_MAX_POLLWAKE_NESTS, to
> avoid stack blasting. Never hit the same queue, to avoid loops like:
>
> epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...);
> epoll_ctl(efd3, EPOLL_CTL_ADD, efd2, ...);
> epoll_ctl(efd4, EPOLL_CTL_ADD, efd3, ...);
> epoll_ctl(efd1, EPOLL_CTL_ADD, efd4, ...);
>
> The code "if (tncur->wq == wq || ..." prevents re-entering the same
> queue/lock.
Since the epoll code is very careful to not nest same instance locks
allow the recursion.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-05 13:27:20 +07:00
|
|
|
/*
|
2009-04-01 05:24:20 +07:00
|
|
|
* Wakeup macros to be used to report events to the targets.
|
lockdep: annotate epoll
On Sat, 2008-01-05 at 13:35 -0800, Davide Libenzi wrote:
> I remember I talked with Arjan about this time ago. Basically, since 1)
> you can drop an epoll fd inside another epoll fd 2) callback-based wakeups
> are used, you can see a wake_up() from inside another wake_up(), but they
> will never refer to the same lock instance.
> Think about:
>
> dfd = socket(...);
> efd1 = epoll_create();
> efd2 = epoll_create();
> epoll_ctl(efd1, EPOLL_CTL_ADD, dfd, ...);
> epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...);
>
> When a packet arrives to the device underneath "dfd", the net code will
> issue a wake_up() on its poll wake list. Epoll (efd1) has installed a
> callback wakeup entry on that queue, and the wake_up() performed by the
> "dfd" net code will end up in ep_poll_callback(). At this point epoll
> (efd1) notices that it may have some event ready, so it needs to wake up
> the waiters on its poll wait list (efd2). So it calls ep_poll_safewake()
> that ends up in another wake_up(), after having checked about the
> recursion constraints. That are, no more than EP_MAX_POLLWAKE_NESTS, to
> avoid stack blasting. Never hit the same queue, to avoid loops like:
>
> epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...);
> epoll_ctl(efd3, EPOLL_CTL_ADD, efd2, ...);
> epoll_ctl(efd4, EPOLL_CTL_ADD, efd3, ...);
> epoll_ctl(efd1, EPOLL_CTL_ADD, efd4, ...);
>
> The code "if (tncur->wq == wq || ..." prevents re-entering the same
> queue/lock.
Since the epoll code is very careful to not nest same instance locks
allow the recursion.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-05 13:27:20 +07:00
|
|
|
*/
|
2013-10-04 15:24:49 +07:00
|
|
|
#define wake_up_poll(x, m) \
|
2009-04-01 05:24:20 +07:00
|
|
|
__wake_up(x, TASK_NORMAL, 1, (void *) (m))
|
2013-10-04 15:24:49 +07:00
|
|
|
#define wake_up_locked_poll(x, m) \
|
2009-04-01 05:24:20 +07:00
|
|
|
__wake_up_locked_key((x), TASK_NORMAL, (void *) (m))
|
2013-10-04 15:24:49 +07:00
|
|
|
#define wake_up_interruptible_poll(x, m) \
|
2009-04-01 05:24:20 +07:00
|
|
|
__wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m))
|
|
|
|
#define wake_up_interruptible_sync_poll(x, m) \
|
|
|
|
__wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m))
|
lockdep: annotate epoll
On Sat, 2008-01-05 at 13:35 -0800, Davide Libenzi wrote:
> I remember I talked with Arjan about this time ago. Basically, since 1)
> you can drop an epoll fd inside another epoll fd 2) callback-based wakeups
> are used, you can see a wake_up() from inside another wake_up(), but they
> will never refer to the same lock instance.
> Think about:
>
> dfd = socket(...);
> efd1 = epoll_create();
> efd2 = epoll_create();
> epoll_ctl(efd1, EPOLL_CTL_ADD, dfd, ...);
> epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...);
>
> When a packet arrives to the device underneath "dfd", the net code will
> issue a wake_up() on its poll wake list. Epoll (efd1) has installed a
> callback wakeup entry on that queue, and the wake_up() performed by the
> "dfd" net code will end up in ep_poll_callback(). At this point epoll
> (efd1) notices that it may have some event ready, so it needs to wake up
> the waiters on its poll wait list (efd2). So it calls ep_poll_safewake()
> that ends up in another wake_up(), after having checked about the
> recursion constraints. That are, no more than EP_MAX_POLLWAKE_NESTS, to
> avoid stack blasting. Never hit the same queue, to avoid loops like:
>
> epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...);
> epoll_ctl(efd3, EPOLL_CTL_ADD, efd2, ...);
> epoll_ctl(efd4, EPOLL_CTL_ADD, efd3, ...);
> epoll_ctl(efd1, EPOLL_CTL_ADD, efd4, ...);
>
> The code "if (tncur->wq == wq || ..." prevents re-entering the same
> queue/lock.
Since the epoll code is very careful to not nest same instance locks
allow the recursion.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-05 13:27:20 +07:00
|
|
|
|
2013-10-02 16:22:33 +07:00
|
|
|
#define ___wait_cond_timeout(condition) \
|
2013-10-02 16:22:19 +07:00
|
|
|
({ \
|
2013-10-04 15:24:49 +07:00
|
|
|
bool __cond = (condition); \
|
|
|
|
if (__cond && !__ret) \
|
|
|
|
__ret = 1; \
|
|
|
|
__cond || !__ret; \
|
2013-10-02 16:22:19 +07:00
|
|
|
})
|
|
|
|
|
2013-10-07 23:18:24 +07:00
|
|
|
#define ___wait_is_interruptible(state) \
|
|
|
|
(!__builtin_constant_p(state) || \
|
|
|
|
state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE) \
|
2013-10-02 16:22:21 +07:00
|
|
|
|
2014-04-19 05:07:17 +07:00
|
|
|
/*
|
|
|
|
* The below macro ___wait_event() has an explicit shadow of the __ret
|
|
|
|
* variable when used from the wait_event_*() macros.
|
|
|
|
*
|
|
|
|
* This is so that both can use the ___wait_cond_timeout() construct
|
|
|
|
* to wrap the condition.
|
|
|
|
*
|
|
|
|
* The type inconsistency of the wait_event_*() __ret variable is also
|
|
|
|
* on purpose; we use long where we can return timeout values and int
|
|
|
|
* otherwise.
|
|
|
|
*/
|
|
|
|
|
2013-10-02 16:22:21 +07:00
|
|
|
#define ___wait_event(wq, condition, state, exclusive, ret, cmd) \
|
2013-10-02 16:22:33 +07:00
|
|
|
({ \
|
2013-10-02 16:22:21 +07:00
|
|
|
__label__ __out; \
|
2013-10-07 23:18:24 +07:00
|
|
|
wait_queue_t __wait; \
|
2014-04-19 05:07:17 +07:00
|
|
|
long __ret = ret; /* explicit shadow */ \
|
2013-10-02 16:22:21 +07:00
|
|
|
\
|
2013-10-07 23:18:24 +07:00
|
|
|
INIT_LIST_HEAD(&__wait.task_list); \
|
|
|
|
if (exclusive) \
|
|
|
|
__wait.flags = WQ_FLAG_EXCLUSIVE; \
|
|
|
|
else \
|
|
|
|
__wait.flags = 0; \
|
|
|
|
\
|
2013-10-02 16:22:21 +07:00
|
|
|
for (;;) { \
|
2013-10-07 23:18:24 +07:00
|
|
|
long __int = prepare_to_wait_event(&wq, &__wait, state);\
|
2013-10-02 16:22:21 +07:00
|
|
|
\
|
|
|
|
if (condition) \
|
|
|
|
break; \
|
|
|
|
\
|
2013-10-07 23:18:24 +07:00
|
|
|
if (___wait_is_interruptible(state) && __int) { \
|
|
|
|
__ret = __int; \
|
2013-10-02 16:22:21 +07:00
|
|
|
if (exclusive) { \
|
2013-10-04 15:24:49 +07:00
|
|
|
abort_exclusive_wait(&wq, &__wait, \
|
|
|
|
state, NULL); \
|
2013-10-02 16:22:21 +07:00
|
|
|
goto __out; \
|
|
|
|
} \
|
|
|
|
break; \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
cmd; \
|
|
|
|
} \
|
|
|
|
finish_wait(&wq, &__wait); \
|
2013-10-02 16:22:33 +07:00
|
|
|
__out: __ret; \
|
|
|
|
})
|
2013-10-02 16:22:21 +07:00
|
|
|
|
2013-10-04 15:24:49 +07:00
|
|
|
#define __wait_event(wq, condition) \
|
2013-10-02 16:22:33 +07:00
|
|
|
(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
|
|
|
|
schedule())
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event - sleep until a condition gets true
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true. The @condition is checked each time
|
|
|
|
* the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*/
|
2013-10-04 15:24:49 +07:00
|
|
|
#define wait_event(wq, condition) \
|
2005-04-17 05:20:36 +07:00
|
|
|
do { \
|
2013-10-04 15:24:49 +07:00
|
|
|
if (condition) \
|
2005-04-17 05:20:36 +07:00
|
|
|
break; \
|
|
|
|
__wait_event(wq, condition); \
|
|
|
|
} while (0)
|
|
|
|
|
2013-10-02 16:22:33 +07:00
|
|
|
#define __wait_event_timeout(wq, condition, timeout) \
|
|
|
|
___wait_event(wq, ___wait_cond_timeout(condition), \
|
|
|
|
TASK_UNINTERRUPTIBLE, 0, timeout, \
|
|
|
|
__ret = schedule_timeout(__ret))
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_timeout - sleep until a condition gets true or a timeout elapses
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
* @timeout: timeout, in jiffies
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true. The @condition is checked each time
|
|
|
|
* the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
2013-05-25 05:55:09 +07:00
|
|
|
* The function returns 0 if the @timeout elapsed, or the remaining
|
|
|
|
* jiffies (at least 1) if the @condition evaluated to %true before
|
|
|
|
* the @timeout elapsed.
|
2005-04-17 05:20:36 +07:00
|
|
|
*/
|
|
|
|
#define wait_event_timeout(wq, condition, timeout) \
|
|
|
|
({ \
|
|
|
|
long __ret = timeout; \
|
2013-10-08 01:31:06 +07:00
|
|
|
if (!___wait_cond_timeout(condition)) \
|
2013-10-02 16:22:33 +07:00
|
|
|
__ret = __wait_event_timeout(wq, condition, timeout); \
|
2005-04-17 05:20:36 +07:00
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
2013-11-14 11:16:16 +07:00
|
|
|
#define __wait_event_cmd(wq, condition, cmd1, cmd2) \
|
|
|
|
(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
|
|
|
|
cmd1; schedule(); cmd2)
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_cmd - sleep until a condition gets true
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
2014-01-21 23:22:06 +07:00
|
|
|
* @cmd1: the command will be executed before sleep
|
|
|
|
* @cmd2: the command will be executed after sleep
|
2013-11-14 11:16:16 +07:00
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true. The @condition is checked each time
|
|
|
|
* the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*/
|
|
|
|
#define wait_event_cmd(wq, condition, cmd1, cmd2) \
|
|
|
|
do { \
|
|
|
|
if (condition) \
|
|
|
|
break; \
|
|
|
|
__wait_event_cmd(wq, condition, cmd1, cmd2); \
|
|
|
|
} while (0)
|
|
|
|
|
2013-10-02 16:22:33 +07:00
|
|
|
#define __wait_event_interruptible(wq, condition) \
|
|
|
|
___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \
|
2013-10-02 16:22:24 +07:00
|
|
|
schedule())
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_interruptible - sleep until a condition gets true
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true or a signal is received.
|
|
|
|
* The @condition is checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* The function will return -ERESTARTSYS if it was interrupted by a
|
|
|
|
* signal and 0 if @condition evaluated to true.
|
|
|
|
*/
|
|
|
|
#define wait_event_interruptible(wq, condition) \
|
|
|
|
({ \
|
|
|
|
int __ret = 0; \
|
|
|
|
if (!(condition)) \
|
2013-10-02 16:22:33 +07:00
|
|
|
__ret = __wait_event_interruptible(wq, condition); \
|
2005-04-17 05:20:36 +07:00
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
2013-10-02 16:22:33 +07:00
|
|
|
#define __wait_event_interruptible_timeout(wq, condition, timeout) \
|
|
|
|
___wait_event(wq, ___wait_cond_timeout(condition), \
|
|
|
|
TASK_INTERRUPTIBLE, 0, timeout, \
|
|
|
|
__ret = schedule_timeout(__ret))
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
* @timeout: timeout, in jiffies
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true or a signal is received.
|
|
|
|
* The @condition is checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
2013-05-25 05:55:09 +07:00
|
|
|
* Returns:
|
|
|
|
* 0 if the @timeout elapsed, -%ERESTARTSYS if it was interrupted by
|
|
|
|
* a signal, or the remaining jiffies (at least 1) if the @condition
|
|
|
|
* evaluated to %true before the @timeout elapsed.
|
2005-04-17 05:20:36 +07:00
|
|
|
*/
|
|
|
|
#define wait_event_interruptible_timeout(wq, condition, timeout) \
|
|
|
|
({ \
|
|
|
|
long __ret = timeout; \
|
2013-10-08 01:31:06 +07:00
|
|
|
if (!___wait_cond_timeout(condition)) \
|
2013-10-04 15:24:49 +07:00
|
|
|
__ret = __wait_event_interruptible_timeout(wq, \
|
2013-10-02 16:22:33 +07:00
|
|
|
condition, timeout); \
|
2005-04-17 05:20:36 +07:00
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
2013-05-08 06:18:43 +07:00
|
|
|
#define __wait_event_hrtimeout(wq, condition, timeout, state) \
|
|
|
|
({ \
|
|
|
|
int __ret = 0; \
|
|
|
|
struct hrtimer_sleeper __t; \
|
|
|
|
\
|
|
|
|
hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, \
|
|
|
|
HRTIMER_MODE_REL); \
|
|
|
|
hrtimer_init_sleeper(&__t, current); \
|
|
|
|
if ((timeout).tv64 != KTIME_MAX) \
|
|
|
|
hrtimer_start_range_ns(&__t.timer, timeout, \
|
|
|
|
current->timer_slack_ns, \
|
|
|
|
HRTIMER_MODE_REL); \
|
|
|
|
\
|
2013-10-02 16:22:33 +07:00
|
|
|
__ret = ___wait_event(wq, condition, state, 0, 0, \
|
2013-05-08 06:18:43 +07:00
|
|
|
if (!__t.task) { \
|
|
|
|
__ret = -ETIME; \
|
|
|
|
break; \
|
|
|
|
} \
|
2013-10-02 16:22:32 +07:00
|
|
|
schedule()); \
|
2013-05-08 06:18:43 +07:00
|
|
|
\
|
|
|
|
hrtimer_cancel(&__t.timer); \
|
|
|
|
destroy_hrtimer_on_stack(&__t.timer); \
|
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_hrtimeout - sleep until a condition gets true or a timeout elapses
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
* @timeout: timeout, as a ktime_t
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true or a signal is received.
|
|
|
|
* The @condition is checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* The function returns 0 if @condition became true, or -ETIME if the timeout
|
|
|
|
* elapsed.
|
|
|
|
*/
|
|
|
|
#define wait_event_hrtimeout(wq, condition, timeout) \
|
|
|
|
({ \
|
|
|
|
int __ret = 0; \
|
|
|
|
if (!(condition)) \
|
|
|
|
__ret = __wait_event_hrtimeout(wq, condition, timeout, \
|
|
|
|
TASK_UNINTERRUPTIBLE); \
|
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_interruptible_hrtimeout - sleep until a condition gets true or a timeout elapses
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
* @timeout: timeout, as a ktime_t
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true or a signal is received.
|
|
|
|
* The @condition is checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* The function returns 0 if @condition became true, -ERESTARTSYS if it was
|
|
|
|
* interrupted by a signal, or -ETIME if the timeout elapsed.
|
|
|
|
*/
|
|
|
|
#define wait_event_interruptible_hrtimeout(wq, condition, timeout) \
|
|
|
|
({ \
|
|
|
|
long __ret = 0; \
|
|
|
|
if (!(condition)) \
|
|
|
|
__ret = __wait_event_hrtimeout(wq, condition, timeout, \
|
|
|
|
TASK_INTERRUPTIBLE); \
|
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
2013-10-02 16:22:33 +07:00
|
|
|
#define __wait_event_interruptible_exclusive(wq, condition) \
|
|
|
|
___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \
|
2013-10-02 16:22:26 +07:00
|
|
|
schedule())
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
#define wait_event_interruptible_exclusive(wq, condition) \
|
|
|
|
({ \
|
|
|
|
int __ret = 0; \
|
|
|
|
if (!(condition)) \
|
2013-10-02 16:22:33 +07:00
|
|
|
__ret = __wait_event_interruptible_exclusive(wq, condition);\
|
2005-04-17 05:20:36 +07:00
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
2010-05-05 17:53:11 +07:00
|
|
|
|
|
|
|
#define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \
|
|
|
|
({ \
|
|
|
|
int __ret = 0; \
|
|
|
|
DEFINE_WAIT(__wait); \
|
|
|
|
if (exclusive) \
|
|
|
|
__wait.flags |= WQ_FLAG_EXCLUSIVE; \
|
|
|
|
do { \
|
|
|
|
if (likely(list_empty(&__wait.task_list))) \
|
|
|
|
__add_wait_queue_tail(&(wq), &__wait); \
|
|
|
|
set_current_state(TASK_INTERRUPTIBLE); \
|
|
|
|
if (signal_pending(current)) { \
|
|
|
|
__ret = -ERESTARTSYS; \
|
|
|
|
break; \
|
|
|
|
} \
|
|
|
|
if (irq) \
|
|
|
|
spin_unlock_irq(&(wq).lock); \
|
|
|
|
else \
|
|
|
|
spin_unlock(&(wq).lock); \
|
|
|
|
schedule(); \
|
|
|
|
if (irq) \
|
|
|
|
spin_lock_irq(&(wq).lock); \
|
|
|
|
else \
|
|
|
|
spin_lock(&(wq).lock); \
|
|
|
|
} while (!(condition)); \
|
|
|
|
__remove_wait_queue(&(wq), &__wait); \
|
|
|
|
__set_current_state(TASK_RUNNING); \
|
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_interruptible_locked - sleep until a condition gets true
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true or a signal is received.
|
|
|
|
* The @condition is checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* It must be called with wq.lock being held. This spinlock is
|
|
|
|
* unlocked while sleeping but @condition testing is done while lock
|
|
|
|
* is held and when this macro exits the lock is held.
|
|
|
|
*
|
|
|
|
* The lock is locked/unlocked using spin_lock()/spin_unlock()
|
|
|
|
* functions which must match the way they are locked/unlocked outside
|
|
|
|
* of this macro.
|
|
|
|
*
|
|
|
|
* wake_up_locked() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* The function will return -ERESTARTSYS if it was interrupted by a
|
|
|
|
* signal and 0 if @condition evaluated to true.
|
|
|
|
*/
|
|
|
|
#define wait_event_interruptible_locked(wq, condition) \
|
|
|
|
((condition) \
|
|
|
|
? 0 : __wait_event_interruptible_locked(wq, condition, 0, 0))
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_interruptible_locked_irq - sleep until a condition gets true
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true or a signal is received.
|
|
|
|
* The @condition is checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* It must be called with wq.lock being held. This spinlock is
|
|
|
|
* unlocked while sleeping but @condition testing is done while lock
|
|
|
|
* is held and when this macro exits the lock is held.
|
|
|
|
*
|
|
|
|
* The lock is locked/unlocked using spin_lock_irq()/spin_unlock_irq()
|
|
|
|
* functions which must match the way they are locked/unlocked outside
|
|
|
|
* of this macro.
|
|
|
|
*
|
|
|
|
* wake_up_locked() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* The function will return -ERESTARTSYS if it was interrupted by a
|
|
|
|
* signal and 0 if @condition evaluated to true.
|
|
|
|
*/
|
|
|
|
#define wait_event_interruptible_locked_irq(wq, condition) \
|
|
|
|
((condition) \
|
|
|
|
? 0 : __wait_event_interruptible_locked(wq, condition, 0, 1))
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_interruptible_exclusive_locked - sleep exclusively until a condition gets true
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true or a signal is received.
|
|
|
|
* The @condition is checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* It must be called with wq.lock being held. This spinlock is
|
|
|
|
* unlocked while sleeping but @condition testing is done while lock
|
|
|
|
* is held and when this macro exits the lock is held.
|
|
|
|
*
|
|
|
|
* The lock is locked/unlocked using spin_lock()/spin_unlock()
|
|
|
|
* functions which must match the way they are locked/unlocked outside
|
|
|
|
* of this macro.
|
|
|
|
*
|
|
|
|
* The process is put on the wait queue with an WQ_FLAG_EXCLUSIVE flag
|
|
|
|
* set thus when other process waits process on the list if this
|
|
|
|
* process is awaken further processes are not considered.
|
|
|
|
*
|
|
|
|
* wake_up_locked() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* The function will return -ERESTARTSYS if it was interrupted by a
|
|
|
|
* signal and 0 if @condition evaluated to true.
|
|
|
|
*/
|
|
|
|
#define wait_event_interruptible_exclusive_locked(wq, condition) \
|
|
|
|
((condition) \
|
|
|
|
? 0 : __wait_event_interruptible_locked(wq, condition, 1, 0))
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_interruptible_exclusive_locked_irq - sleep until a condition gets true
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true or a signal is received.
|
|
|
|
* The @condition is checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* It must be called with wq.lock being held. This spinlock is
|
|
|
|
* unlocked while sleeping but @condition testing is done while lock
|
|
|
|
* is held and when this macro exits the lock is held.
|
|
|
|
*
|
|
|
|
* The lock is locked/unlocked using spin_lock_irq()/spin_unlock_irq()
|
|
|
|
* functions which must match the way they are locked/unlocked outside
|
|
|
|
* of this macro.
|
|
|
|
*
|
|
|
|
* The process is put on the wait queue with an WQ_FLAG_EXCLUSIVE flag
|
|
|
|
* set thus when other process waits process on the list if this
|
|
|
|
* process is awaken further processes are not considered.
|
|
|
|
*
|
|
|
|
* wake_up_locked() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* The function will return -ERESTARTSYS if it was interrupted by a
|
|
|
|
* signal and 0 if @condition evaluated to true.
|
|
|
|
*/
|
|
|
|
#define wait_event_interruptible_exclusive_locked_irq(wq, condition) \
|
|
|
|
((condition) \
|
|
|
|
? 0 : __wait_event_interruptible_locked(wq, condition, 1, 1))
|
|
|
|
|
|
|
|
|
2013-10-02 16:22:33 +07:00
|
|
|
#define __wait_event_killable(wq, condition) \
|
|
|
|
___wait_event(wq, condition, TASK_KILLABLE, 0, 0, schedule())
|
2007-12-07 00:00:00 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_killable - sleep until a condition gets true
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_KILLABLE) until the
|
|
|
|
* @condition evaluates to true or a signal is received.
|
|
|
|
* The @condition is checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* The function will return -ERESTARTSYS if it was interrupted by a
|
|
|
|
* signal and 0 if @condition evaluated to true.
|
|
|
|
*/
|
|
|
|
#define wait_event_killable(wq, condition) \
|
|
|
|
({ \
|
|
|
|
int __ret = 0; \
|
|
|
|
if (!(condition)) \
|
2013-10-02 16:22:33 +07:00
|
|
|
__ret = __wait_event_killable(wq, condition); \
|
2007-12-07 00:00:00 +07:00
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
2012-11-30 17:42:40 +07:00
|
|
|
|
|
|
|
#define __wait_event_lock_irq(wq, condition, lock, cmd) \
|
2013-10-02 16:22:33 +07:00
|
|
|
(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
|
|
|
|
spin_unlock_irq(&lock); \
|
|
|
|
cmd; \
|
|
|
|
schedule(); \
|
|
|
|
spin_lock_irq(&lock))
|
2012-11-30 17:42:40 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_lock_irq_cmd - sleep until a condition gets true. The
|
|
|
|
* condition is checked under the lock. This
|
|
|
|
* is expected to be called with the lock
|
|
|
|
* taken.
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
* @lock: a locked spinlock_t, which will be released before cmd
|
|
|
|
* and schedule() and reacquired afterwards.
|
|
|
|
* @cmd: a command which is invoked outside the critical section before
|
|
|
|
* sleep
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true. The @condition is checked each time
|
|
|
|
* the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* This is supposed to be called while holding the lock. The lock is
|
|
|
|
* dropped before invoking the cmd and going to sleep and is reacquired
|
|
|
|
* afterwards.
|
|
|
|
*/
|
|
|
|
#define wait_event_lock_irq_cmd(wq, condition, lock, cmd) \
|
|
|
|
do { \
|
|
|
|
if (condition) \
|
|
|
|
break; \
|
|
|
|
__wait_event_lock_irq(wq, condition, lock, cmd); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_lock_irq - sleep until a condition gets true. The
|
|
|
|
* condition is checked under the lock. This
|
|
|
|
* is expected to be called with the lock
|
|
|
|
* taken.
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
* @lock: a locked spinlock_t, which will be released before schedule()
|
|
|
|
* and reacquired afterwards.
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true. The @condition is checked each time
|
|
|
|
* the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* This is supposed to be called while holding the lock. The lock is
|
|
|
|
* dropped before going to sleep and is reacquired afterwards.
|
|
|
|
*/
|
|
|
|
#define wait_event_lock_irq(wq, condition, lock) \
|
|
|
|
do { \
|
|
|
|
if (condition) \
|
|
|
|
break; \
|
|
|
|
__wait_event_lock_irq(wq, condition, lock, ); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
2013-10-02 16:22:33 +07:00
|
|
|
#define __wait_event_interruptible_lock_irq(wq, condition, lock, cmd) \
|
2013-10-04 15:24:49 +07:00
|
|
|
___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \
|
2013-10-02 16:22:33 +07:00
|
|
|
spin_unlock_irq(&lock); \
|
|
|
|
cmd; \
|
|
|
|
schedule(); \
|
2013-10-02 16:22:28 +07:00
|
|
|
spin_lock_irq(&lock))
|
2012-11-30 17:42:40 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true.
|
|
|
|
* The condition is checked under the lock. This is expected to
|
|
|
|
* be called with the lock taken.
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
* @lock: a locked spinlock_t, which will be released before cmd and
|
|
|
|
* schedule() and reacquired afterwards.
|
|
|
|
* @cmd: a command which is invoked outside the critical section before
|
|
|
|
* sleep
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true or a signal is received. The @condition is
|
|
|
|
* checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* This is supposed to be called while holding the lock. The lock is
|
|
|
|
* dropped before invoking the cmd and going to sleep and is reacquired
|
|
|
|
* afterwards.
|
|
|
|
*
|
|
|
|
* The macro will return -ERESTARTSYS if it was interrupted by a signal
|
|
|
|
* and 0 if @condition evaluated to true.
|
|
|
|
*/
|
|
|
|
#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd) \
|
|
|
|
({ \
|
|
|
|
int __ret = 0; \
|
|
|
|
if (!(condition)) \
|
2013-10-04 15:24:49 +07:00
|
|
|
__ret = __wait_event_interruptible_lock_irq(wq, \
|
2013-10-02 16:22:33 +07:00
|
|
|
condition, lock, cmd); \
|
2012-11-30 17:42:40 +07:00
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_interruptible_lock_irq - sleep until a condition gets true.
|
|
|
|
* The condition is checked under the lock. This is expected
|
|
|
|
* to be called with the lock taken.
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
* @lock: a locked spinlock_t, which will be released before schedule()
|
|
|
|
* and reacquired afterwards.
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true or signal is received. The @condition is
|
|
|
|
* checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* This is supposed to be called while holding the lock. The lock is
|
|
|
|
* dropped before going to sleep and is reacquired afterwards.
|
|
|
|
*
|
|
|
|
* The macro will return -ERESTARTSYS if it was interrupted by a signal
|
|
|
|
* and 0 if @condition evaluated to true.
|
|
|
|
*/
|
|
|
|
#define wait_event_interruptible_lock_irq(wq, condition, lock) \
|
|
|
|
({ \
|
|
|
|
int __ret = 0; \
|
|
|
|
if (!(condition)) \
|
2013-10-02 16:22:33 +07:00
|
|
|
__ret = __wait_event_interruptible_lock_irq(wq, \
|
2013-10-23 18:40:55 +07:00
|
|
|
condition, lock,); \
|
2012-11-30 17:42:40 +07:00
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
2013-10-04 15:24:49 +07:00
|
|
|
#define __wait_event_interruptible_lock_irq_timeout(wq, condition, \
|
|
|
|
lock, timeout) \
|
2013-10-02 16:22:33 +07:00
|
|
|
___wait_event(wq, ___wait_cond_timeout(condition), \
|
2013-10-31 18:48:14 +07:00
|
|
|
TASK_INTERRUPTIBLE, 0, timeout, \
|
2013-10-02 16:22:33 +07:00
|
|
|
spin_unlock_irq(&lock); \
|
|
|
|
__ret = schedule_timeout(__ret); \
|
2013-10-02 16:22:29 +07:00
|
|
|
spin_lock_irq(&lock));
|
2013-08-22 22:45:36 +07:00
|
|
|
|
|
|
|
/**
|
2013-10-04 15:24:49 +07:00
|
|
|
* wait_event_interruptible_lock_irq_timeout - sleep until a condition gets
|
|
|
|
* true or a timeout elapses. The condition is checked under
|
|
|
|
* the lock. This is expected to be called with the lock taken.
|
2013-08-22 22:45:36 +07:00
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
* @lock: a locked spinlock_t, which will be released before schedule()
|
|
|
|
* and reacquired afterwards.
|
|
|
|
* @timeout: timeout, in jiffies
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true or signal is received. The @condition is
|
|
|
|
* checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* This is supposed to be called while holding the lock. The lock is
|
|
|
|
* dropped before going to sleep and is reacquired afterwards.
|
|
|
|
*
|
|
|
|
* The function returns 0 if the @timeout elapsed, -ERESTARTSYS if it
|
|
|
|
* was interrupted by a signal, and the remaining jiffies otherwise
|
|
|
|
* if the condition evaluated to true before the timeout elapsed.
|
|
|
|
*/
|
|
|
|
#define wait_event_interruptible_lock_irq_timeout(wq, condition, lock, \
|
|
|
|
timeout) \
|
|
|
|
({ \
|
2013-10-02 16:22:33 +07:00
|
|
|
long __ret = timeout; \
|
2013-10-08 01:31:06 +07:00
|
|
|
if (!___wait_cond_timeout(condition)) \
|
2013-10-02 16:22:33 +07:00
|
|
|
__ret = __wait_event_interruptible_lock_irq_timeout( \
|
|
|
|
wq, condition, lock, timeout); \
|
2013-08-22 22:45:36 +07:00
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
* Waitqueues which are removed from the waitqueue_head at wakeup time
|
|
|
|
*/
|
2008-02-14 06:03:15 +07:00
|
|
|
void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state);
|
|
|
|
void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state);
|
2013-10-07 23:18:24 +07:00
|
|
|
long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state);
|
2008-02-14 06:03:15 +07:00
|
|
|
void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
|
2013-10-04 15:24:49 +07:00
|
|
|
void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, unsigned int mode, void *key);
|
2005-04-17 05:20:36 +07:00
|
|
|
int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
|
|
|
|
int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
|
|
|
|
|
2009-04-28 16:24:21 +07:00
|
|
|
#define DEFINE_WAIT_FUNC(name, function) \
|
2005-04-17 05:20:36 +07:00
|
|
|
wait_queue_t name = { \
|
2005-06-23 14:10:27 +07:00
|
|
|
.private = current, \
|
2009-04-28 16:24:21 +07:00
|
|
|
.func = function, \
|
2005-05-25 06:31:42 +07:00
|
|
|
.task_list = LIST_HEAD_INIT((name).task_list), \
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
2009-04-28 16:24:21 +07:00
|
|
|
#define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#define DEFINE_WAIT_BIT(name, word, bit) \
|
|
|
|
struct wait_bit_queue name = { \
|
|
|
|
.key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \
|
|
|
|
.wait = { \
|
2005-06-23 14:10:27 +07:00
|
|
|
.private = current, \
|
2005-04-17 05:20:36 +07:00
|
|
|
.func = wake_bit_function, \
|
|
|
|
.task_list = \
|
|
|
|
LIST_HEAD_INIT((name).wait.task_list), \
|
|
|
|
}, \
|
|
|
|
}
|
|
|
|
|
|
|
|
#define init_wait(wait) \
|
|
|
|
do { \
|
2005-06-23 14:10:27 +07:00
|
|
|
(wait)->private = current; \
|
2005-04-17 05:20:36 +07:00
|
|
|
(wait)->func = autoremove_wake_function; \
|
|
|
|
INIT_LIST_HEAD(&(wait)->task_list); \
|
2010-10-05 15:47:57 +07:00
|
|
|
(wait)->flags = 0; \
|
2005-04-17 05:20:36 +07:00
|
|
|
} while (0)
|
|
|
|
|
sched: Remove proliferation of wait_on_bit() action functions
The current "wait_on_bit" interface requires an 'action'
function to be provided which does the actual waiting.
There are over 20 such functions, many of them identical.
Most cases can be satisfied by one of just two functions, one
which uses io_schedule() and one which just uses schedule().
So:
Rename wait_on_bit and wait_on_bit_lock to
wait_on_bit_action and wait_on_bit_lock_action
to make it explicit that they need an action function.
Introduce new wait_on_bit{,_lock} and wait_on_bit{,_lock}_io
which are *not* given an action function but implicitly use
a standard one.
The decision to error-out if a signal is pending is now made
based on the 'mode' argument rather than being encoded in the action
function.
All instances of the old wait_on_bit and wait_on_bit_lock which
can use the new version have been changed accordingly and their
action functions have been discarded.
wait_on_bit{_lock} does not return any specific error code in the
event of a signal so the caller must check for non-zero and
interpolate their own error code as appropriate.
The wait_on_bit() call in __fscache_wait_on_invalidate() was
ambiguous as it specified TASK_UNINTERRUPTIBLE but used
fscache_wait_bit_interruptible as an action function.
David Howells confirms this should be uniformly
"uninterruptible"
The main remaining user of wait_on_bit{,_lock}_action is NFS
which needs to use a freezer-aware schedule() call.
A comment in fs/gfs2/glock.c notes that having multiple 'action'
functions is useful as they display differently in the 'wchan'
field of 'ps'. (and /proc/$PID/wchan).
As the new bit_wait{,_io} functions are tagged "__sched", they
will not show up at all, but something higher in the stack. So
the distinction will still be visible, only with different
function names (gds2_glock_wait versus gfs2_glock_dq_wait in the
gfs2/glock.c case).
Since first version of this patch (against 3.15) two new action
functions appeared, on in NFS and one in CIFS. CIFS also now
uses an action function that makes the same freezer aware
schedule call as NFS.
Signed-off-by: NeilBrown <neilb@suse.de>
Acked-by: David Howells <dhowells@redhat.com> (fscache, keys)
Acked-by: Steven Whitehouse <swhiteho@redhat.com> (gfs2)
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steve French <sfrench@samba.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20140707051603.28027.72349.stgit@notabene.brown
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2014-07-07 12:16:04 +07:00
|
|
|
|
2014-07-07 12:16:04 +07:00
|
|
|
extern int bit_wait(struct wait_bit_key *);
|
|
|
|
extern int bit_wait_io(struct wait_bit_key *);
|
sched: Remove proliferation of wait_on_bit() action functions
The current "wait_on_bit" interface requires an 'action'
function to be provided which does the actual waiting.
There are over 20 such functions, many of them identical.
Most cases can be satisfied by one of just two functions, one
which uses io_schedule() and one which just uses schedule().
So:
Rename wait_on_bit and wait_on_bit_lock to
wait_on_bit_action and wait_on_bit_lock_action
to make it explicit that they need an action function.
Introduce new wait_on_bit{,_lock} and wait_on_bit{,_lock}_io
which are *not* given an action function but implicitly use
a standard one.
The decision to error-out if a signal is pending is now made
based on the 'mode' argument rather than being encoded in the action
function.
All instances of the old wait_on_bit and wait_on_bit_lock which
can use the new version have been changed accordingly and their
action functions have been discarded.
wait_on_bit{_lock} does not return any specific error code in the
event of a signal so the caller must check for non-zero and
interpolate their own error code as appropriate.
The wait_on_bit() call in __fscache_wait_on_invalidate() was
ambiguous as it specified TASK_UNINTERRUPTIBLE but used
fscache_wait_bit_interruptible as an action function.
David Howells confirms this should be uniformly
"uninterruptible"
The main remaining user of wait_on_bit{,_lock}_action is NFS
which needs to use a freezer-aware schedule() call.
A comment in fs/gfs2/glock.c notes that having multiple 'action'
functions is useful as they display differently in the 'wchan'
field of 'ps'. (and /proc/$PID/wchan).
As the new bit_wait{,_io} functions are tagged "__sched", they
will not show up at all, but something higher in the stack. So
the distinction will still be visible, only with different
function names (gds2_glock_wait versus gfs2_glock_dq_wait in the
gfs2/glock.c case).
Since first version of this patch (against 3.15) two new action
functions appeared, on in NFS and one in CIFS. CIFS also now
uses an action function that makes the same freezer aware
schedule call as NFS.
Signed-off-by: NeilBrown <neilb@suse.de>
Acked-by: David Howells <dhowells@redhat.com> (fscache, keys)
Acked-by: Steven Whitehouse <swhiteho@redhat.com> (gfs2)
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steve French <sfrench@samba.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20140707051603.28027.72349.stgit@notabene.brown
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2014-07-07 12:16:04 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/**
|
|
|
|
* wait_on_bit - wait for a bit to be cleared
|
|
|
|
* @word: the word being waited on, a kernel virtual address
|
|
|
|
* @bit: the bit of the word being waited on
|
|
|
|
* @mode: the task state to sleep in
|
|
|
|
*
|
|
|
|
* There is a standard hashed waitqueue table for generic use. This
|
|
|
|
* is the part of the hashtable's accessor API that waits on a bit.
|
|
|
|
* For instance, if one were to have waiters on a bitflag, one would
|
|
|
|
* call wait_on_bit() in threads waiting for the bit to clear.
|
|
|
|
* One uses wait_on_bit() where one is waiting for the bit to clear,
|
|
|
|
* but has no intention of setting it.
|
sched: Remove proliferation of wait_on_bit() action functions
The current "wait_on_bit" interface requires an 'action'
function to be provided which does the actual waiting.
There are over 20 such functions, many of them identical.
Most cases can be satisfied by one of just two functions, one
which uses io_schedule() and one which just uses schedule().
So:
Rename wait_on_bit and wait_on_bit_lock to
wait_on_bit_action and wait_on_bit_lock_action
to make it explicit that they need an action function.
Introduce new wait_on_bit{,_lock} and wait_on_bit{,_lock}_io
which are *not* given an action function but implicitly use
a standard one.
The decision to error-out if a signal is pending is now made
based on the 'mode' argument rather than being encoded in the action
function.
All instances of the old wait_on_bit and wait_on_bit_lock which
can use the new version have been changed accordingly and their
action functions have been discarded.
wait_on_bit{_lock} does not return any specific error code in the
event of a signal so the caller must check for non-zero and
interpolate their own error code as appropriate.
The wait_on_bit() call in __fscache_wait_on_invalidate() was
ambiguous as it specified TASK_UNINTERRUPTIBLE but used
fscache_wait_bit_interruptible as an action function.
David Howells confirms this should be uniformly
"uninterruptible"
The main remaining user of wait_on_bit{,_lock}_action is NFS
which needs to use a freezer-aware schedule() call.
A comment in fs/gfs2/glock.c notes that having multiple 'action'
functions is useful as they display differently in the 'wchan'
field of 'ps'. (and /proc/$PID/wchan).
As the new bit_wait{,_io} functions are tagged "__sched", they
will not show up at all, but something higher in the stack. So
the distinction will still be visible, only with different
function names (gds2_glock_wait versus gfs2_glock_dq_wait in the
gfs2/glock.c case).
Since first version of this patch (against 3.15) two new action
functions appeared, on in NFS and one in CIFS. CIFS also now
uses an action function that makes the same freezer aware
schedule call as NFS.
Signed-off-by: NeilBrown <neilb@suse.de>
Acked-by: David Howells <dhowells@redhat.com> (fscache, keys)
Acked-by: Steven Whitehouse <swhiteho@redhat.com> (gfs2)
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steve French <sfrench@samba.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20140707051603.28027.72349.stgit@notabene.brown
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2014-07-07 12:16:04 +07:00
|
|
|
* Returned value will be zero if the bit was cleared, or non-zero
|
|
|
|
* if the process received a signal and the mode permitted wakeup
|
|
|
|
* on that signal.
|
|
|
|
*/
|
|
|
|
static inline int
|
|
|
|
wait_on_bit(void *word, int bit, unsigned mode)
|
|
|
|
{
|
|
|
|
if (!test_bit(bit, word))
|
|
|
|
return 0;
|
|
|
|
return out_of_line_wait_on_bit(word, bit,
|
|
|
|
bit_wait,
|
|
|
|
mode);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_on_bit_io - wait for a bit to be cleared
|
|
|
|
* @word: the word being waited on, a kernel virtual address
|
|
|
|
* @bit: the bit of the word being waited on
|
|
|
|
* @mode: the task state to sleep in
|
|
|
|
*
|
|
|
|
* Use the standard hashed waitqueue table to wait for a bit
|
|
|
|
* to be cleared. This is similar to wait_on_bit(), but calls
|
|
|
|
* io_schedule() instead of schedule() for the actual waiting.
|
|
|
|
*
|
|
|
|
* Returned value will be zero if the bit was cleared, or non-zero
|
|
|
|
* if the process received a signal and the mode permitted wakeup
|
|
|
|
* on that signal.
|
|
|
|
*/
|
|
|
|
static inline int
|
|
|
|
wait_on_bit_io(void *word, int bit, unsigned mode)
|
|
|
|
{
|
|
|
|
if (!test_bit(bit, word))
|
|
|
|
return 0;
|
|
|
|
return out_of_line_wait_on_bit(word, bit,
|
|
|
|
bit_wait_io,
|
|
|
|
mode);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_on_bit_action - wait for a bit to be cleared
|
|
|
|
* @word: the word being waited on, a kernel virtual address
|
|
|
|
* @bit: the bit of the word being waited on
|
|
|
|
* @action: the function used to sleep, which may take special actions
|
|
|
|
* @mode: the task state to sleep in
|
|
|
|
*
|
|
|
|
* Use the standard hashed waitqueue table to wait for a bit
|
|
|
|
* to be cleared, and allow the waiting action to be specified.
|
|
|
|
* This is like wait_on_bit() but allows fine control of how the waiting
|
|
|
|
* is done.
|
|
|
|
*
|
|
|
|
* Returned value will be zero if the bit was cleared, or non-zero
|
|
|
|
* if the process received a signal and the mode permitted wakeup
|
|
|
|
* on that signal.
|
2005-04-17 05:20:36 +07:00
|
|
|
*/
|
2013-10-04 15:24:49 +07:00
|
|
|
static inline int
|
2014-07-07 12:16:04 +07:00
|
|
|
wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
|
|
|
if (!test_bit(bit, word))
|
|
|
|
return 0;
|
|
|
|
return out_of_line_wait_on_bit(word, bit, action, mode);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it
|
|
|
|
* @word: the word being waited on, a kernel virtual address
|
|
|
|
* @bit: the bit of the word being waited on
|
|
|
|
* @mode: the task state to sleep in
|
|
|
|
*
|
|
|
|
* There is a standard hashed waitqueue table for generic use. This
|
|
|
|
* is the part of the hashtable's accessor API that waits on a bit
|
|
|
|
* when one intends to set it, for instance, trying to lock bitflags.
|
|
|
|
* For instance, if one were to have waiters trying to set bitflag
|
|
|
|
* and waiting for it to clear before setting it, one would call
|
|
|
|
* wait_on_bit() in threads waiting to be able to set the bit.
|
|
|
|
* One uses wait_on_bit_lock() where one is waiting for the bit to
|
|
|
|
* clear with the intention of setting it, and when done, clearing it.
|
sched: Remove proliferation of wait_on_bit() action functions
The current "wait_on_bit" interface requires an 'action'
function to be provided which does the actual waiting.
There are over 20 such functions, many of them identical.
Most cases can be satisfied by one of just two functions, one
which uses io_schedule() and one which just uses schedule().
So:
Rename wait_on_bit and wait_on_bit_lock to
wait_on_bit_action and wait_on_bit_lock_action
to make it explicit that they need an action function.
Introduce new wait_on_bit{,_lock} and wait_on_bit{,_lock}_io
which are *not* given an action function but implicitly use
a standard one.
The decision to error-out if a signal is pending is now made
based on the 'mode' argument rather than being encoded in the action
function.
All instances of the old wait_on_bit and wait_on_bit_lock which
can use the new version have been changed accordingly and their
action functions have been discarded.
wait_on_bit{_lock} does not return any specific error code in the
event of a signal so the caller must check for non-zero and
interpolate their own error code as appropriate.
The wait_on_bit() call in __fscache_wait_on_invalidate() was
ambiguous as it specified TASK_UNINTERRUPTIBLE but used
fscache_wait_bit_interruptible as an action function.
David Howells confirms this should be uniformly
"uninterruptible"
The main remaining user of wait_on_bit{,_lock}_action is NFS
which needs to use a freezer-aware schedule() call.
A comment in fs/gfs2/glock.c notes that having multiple 'action'
functions is useful as they display differently in the 'wchan'
field of 'ps'. (and /proc/$PID/wchan).
As the new bit_wait{,_io} functions are tagged "__sched", they
will not show up at all, but something higher in the stack. So
the distinction will still be visible, only with different
function names (gds2_glock_wait versus gfs2_glock_dq_wait in the
gfs2/glock.c case).
Since first version of this patch (against 3.15) two new action
functions appeared, on in NFS and one in CIFS. CIFS also now
uses an action function that makes the same freezer aware
schedule call as NFS.
Signed-off-by: NeilBrown <neilb@suse.de>
Acked-by: David Howells <dhowells@redhat.com> (fscache, keys)
Acked-by: Steven Whitehouse <swhiteho@redhat.com> (gfs2)
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steve French <sfrench@samba.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20140707051603.28027.72349.stgit@notabene.brown
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2014-07-07 12:16:04 +07:00
|
|
|
*
|
|
|
|
* Returns zero if the bit was (eventually) found to be clear and was
|
|
|
|
* set. Returns non-zero if a signal was delivered to the process and
|
|
|
|
* the @mode allows that signal to wake the process.
|
|
|
|
*/
|
|
|
|
static inline int
|
|
|
|
wait_on_bit_lock(void *word, int bit, unsigned mode)
|
|
|
|
{
|
|
|
|
if (!test_and_set_bit(bit, word))
|
|
|
|
return 0;
|
|
|
|
return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it
|
|
|
|
* @word: the word being waited on, a kernel virtual address
|
|
|
|
* @bit: the bit of the word being waited on
|
|
|
|
* @mode: the task state to sleep in
|
|
|
|
*
|
|
|
|
* Use the standard hashed waitqueue table to wait for a bit
|
|
|
|
* to be cleared and then to atomically set it. This is similar
|
|
|
|
* to wait_on_bit(), but calls io_schedule() instead of schedule()
|
|
|
|
* for the actual waiting.
|
|
|
|
*
|
|
|
|
* Returns zero if the bit was (eventually) found to be clear and was
|
|
|
|
* set. Returns non-zero if a signal was delivered to the process and
|
|
|
|
* the @mode allows that signal to wake the process.
|
|
|
|
*/
|
|
|
|
static inline int
|
|
|
|
wait_on_bit_lock_io(void *word, int bit, unsigned mode)
|
|
|
|
{
|
|
|
|
if (!test_and_set_bit(bit, word))
|
|
|
|
return 0;
|
|
|
|
return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it
|
|
|
|
* @word: the word being waited on, a kernel virtual address
|
|
|
|
* @bit: the bit of the word being waited on
|
|
|
|
* @action: the function used to sleep, which may take special actions
|
|
|
|
* @mode: the task state to sleep in
|
|
|
|
*
|
|
|
|
* Use the standard hashed waitqueue table to wait for a bit
|
|
|
|
* to be cleared and then to set it, and allow the waiting action
|
|
|
|
* to be specified.
|
|
|
|
* This is like wait_on_bit() but allows fine control of how the waiting
|
|
|
|
* is done.
|
|
|
|
*
|
|
|
|
* Returns zero if the bit was (eventually) found to be clear and was
|
|
|
|
* set. Returns non-zero if a signal was delivered to the process and
|
|
|
|
* the @mode allows that signal to wake the process.
|
2005-04-17 05:20:36 +07:00
|
|
|
*/
|
2013-10-04 15:24:49 +07:00
|
|
|
static inline int
|
2014-07-07 12:16:04 +07:00
|
|
|
wait_on_bit_lock_action(void *word, int bit, wait_bit_action_f *action, unsigned mode)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
|
|
|
if (!test_and_set_bit(bit, word))
|
|
|
|
return 0;
|
|
|
|
return out_of_line_wait_on_bit_lock(word, bit, action, mode);
|
|
|
|
}
|
Add wait_on_atomic_t() and wake_up_atomic_t()
Add wait_on_atomic_t() and wake_up_atomic_t() to indicate became-zero events on
atomic_t types. This uses the bit-wake waitqueue table. The key is set to a
value outside of the number of bits in a long so that wait_on_bit() won't be
woken up accidentally.
What I'm using this for is: in a following patch I add a counter to struct
fscache_cookie to count the number of outstanding operations that need access
to netfs data. The way this works is:
(1) When a cookie is allocated, the counter is initialised to 1.
(2) When an operation wants to access netfs data, it calls atomic_inc_unless()
to increment the counter before it does so. If it was 0, then the counter
isn't incremented, the operation isn't permitted to access the netfs data
(which might by this point no longer exist) and the operation aborts in
some appropriate manner.
(3) When an operation finishes with the netfs data, it decrements the counter
and if it reaches 0, calls wake_up_atomic_t() on it - the assumption being
that it was the last blocker.
(4) When a cookie is released, the counter is decremented and the releaser
uses wait_on_atomic_t() to wait for the counter to become 0 - which should
indicate no one is using the netfs data any longer. The netfs data can
then be destroyed.
There are some alternatives that I have thought of and that have been suggested
by Tejun Heo:
(A) Using wait_on_bit() to wait on a bit in the counter. This doesn't work
because if that bit happens to be 0 then the wait won't happen - even if
the counter is non-zero.
(B) Using wait_on_bit() to wait on a flag elsewhere which is cleared when the
counter reaches 0. Such a flag would be redundant and would add
complexity.
(C) Adding a waitqueue to fscache_cookie - this would expand that struct by
several words for an event that happens just once in each cookie's
lifetime. Further, cookies are generally per-file so there are likely to
be a lot of them.
(D) Similar to (C), but add a pointer to a waitqueue in the cookie instead of
a waitqueue. This would add single word per cookie and so would be less
of an expansion - but still an expansion.
(E) Adding a static waitqueue to the fscache module. Generally this would be
fine, but under certain circumstances many cookies will all get added at
the same time (eg. NFS umount, cache withdrawal) thereby presenting
scaling issues. Note that the wait may be significant as disk I/O may be
in progress.
So, I think reusing the wait_on_bit() waitqueue set is reasonable. I don't
make much use of the waitqueue I need on a per-cookie basis, but sometimes I
have a huge flood of the cookies to deal with.
I also don't want to add a whole new set of global waitqueue tables
specifically for the dec-to-0 event if I can reuse the bit tables.
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-By: Milosz Tanski <milosz@adfin.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
2013-05-11 01:50:26 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_on_atomic_t - Wait for an atomic_t to become 0
|
|
|
|
* @val: The atomic value being waited on, a kernel virtual address
|
|
|
|
* @action: the function used to sleep, which may take special actions
|
|
|
|
* @mode: the task state to sleep in
|
|
|
|
*
|
|
|
|
* Wait for an atomic_t to become 0. We abuse the bit-wait waitqueue table for
|
|
|
|
* the purpose of getting a waitqueue, but we set the key to a bit number
|
|
|
|
* outside of the target 'word'.
|
|
|
|
*/
|
|
|
|
static inline
|
|
|
|
int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode)
|
|
|
|
{
|
|
|
|
if (atomic_read(val) == 0)
|
|
|
|
return 0;
|
|
|
|
return out_of_line_wait_on_atomic_t(val, action, mode);
|
|
|
|
}
|
2013-10-04 15:24:49 +07:00
|
|
|
|
|
|
|
#endif /* _LINUX_WAIT_H */
|