2005-04-17 05:20:36 +07:00
|
|
|
#ifndef _LINUX_WAIT_H
|
|
|
|
#define _LINUX_WAIT_H
|
|
|
|
|
|
|
|
|
|
|
|
#include <linux/list.h>
|
|
|
|
#include <linux/stddef.h>
|
|
|
|
#include <linux/spinlock.h>
|
|
|
|
#include <asm/current.h>
|
2012-10-13 16:46:48 +07:00
|
|
|
#include <uapi/linux/wait.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
typedef struct __wait_queue wait_queue_t;
|
2009-09-15 00:55:44 +07:00
|
|
|
typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
|
|
|
|
int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
struct __wait_queue {
|
|
|
|
unsigned int flags;
|
|
|
|
#define WQ_FLAG_EXCLUSIVE 0x01
|
2005-06-23 14:10:27 +07:00
|
|
|
void *private;
|
2005-04-17 05:20:36 +07:00
|
|
|
wait_queue_func_t func;
|
|
|
|
struct list_head task_list;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct wait_bit_key {
|
|
|
|
void *flags;
|
|
|
|
int bit_nr;
|
Add wait_on_atomic_t() and wake_up_atomic_t()
Add wait_on_atomic_t() and wake_up_atomic_t() to indicate became-zero events on
atomic_t types. This uses the bit-wake waitqueue table. The key is set to a
value outside of the number of bits in a long so that wait_on_bit() won't be
woken up accidentally.
What I'm using this for is: in a following patch I add a counter to struct
fscache_cookie to count the number of outstanding operations that need access
to netfs data. The way this works is:
(1) When a cookie is allocated, the counter is initialised to 1.
(2) When an operation wants to access netfs data, it calls atomic_inc_unless()
to increment the counter before it does so. If it was 0, then the counter
isn't incremented, the operation isn't permitted to access the netfs data
(which might by this point no longer exist) and the operation aborts in
some appropriate manner.
(3) When an operation finishes with the netfs data, it decrements the counter
and if it reaches 0, calls wake_up_atomic_t() on it - the assumption being
that it was the last blocker.
(4) When a cookie is released, the counter is decremented and the releaser
uses wait_on_atomic_t() to wait for the counter to become 0 - which should
indicate no one is using the netfs data any longer. The netfs data can
then be destroyed.
There are some alternatives that I have thought of and that have been suggested
by Tejun Heo:
(A) Using wait_on_bit() to wait on a bit in the counter. This doesn't work
because if that bit happens to be 0 then the wait won't happen - even if
the counter is non-zero.
(B) Using wait_on_bit() to wait on a flag elsewhere which is cleared when the
counter reaches 0. Such a flag would be redundant and would add
complexity.
(C) Adding a waitqueue to fscache_cookie - this would expand that struct by
several words for an event that happens just once in each cookie's
lifetime. Further, cookies are generally per-file so there are likely to
be a lot of them.
(D) Similar to (C), but add a pointer to a waitqueue in the cookie instead of
a waitqueue. This would add single word per cookie and so would be less
of an expansion - but still an expansion.
(E) Adding a static waitqueue to the fscache module. Generally this would be
fine, but under certain circumstances many cookies will all get added at
the same time (eg. NFS umount, cache withdrawal) thereby presenting
scaling issues. Note that the wait may be significant as disk I/O may be
in progress.
So, I think reusing the wait_on_bit() waitqueue set is reasonable. I don't
make much use of the waitqueue I need on a per-cookie basis, but sometimes I
have a huge flood of the cookies to deal with.
I also don't want to add a whole new set of global waitqueue tables
specifically for the dec-to-0 event if I can reuse the bit tables.
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-By: Milosz Tanski <milosz@adfin.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
2013-05-11 01:50:26 +07:00
|
|
|
#define WAIT_ATOMIC_T_BIT_NR -1
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
struct wait_bit_queue {
|
|
|
|
struct wait_bit_key key;
|
|
|
|
wait_queue_t wait;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct __wait_queue_head {
|
|
|
|
spinlock_t lock;
|
|
|
|
struct list_head task_list;
|
|
|
|
};
|
|
|
|
typedef struct __wait_queue_head wait_queue_head_t;
|
|
|
|
|
2005-11-07 15:59:43 +07:00
|
|
|
struct task_struct;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Macros for declaration and initialisaton of the datatypes
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define __WAITQUEUE_INITIALIZER(name, tsk) { \
|
2005-06-23 14:10:27 +07:00
|
|
|
.private = tsk, \
|
2005-04-17 05:20:36 +07:00
|
|
|
.func = default_wake_function, \
|
|
|
|
.task_list = { NULL, NULL } }
|
|
|
|
|
|
|
|
#define DECLARE_WAITQUEUE(name, tsk) \
|
|
|
|
wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)
|
|
|
|
|
|
|
|
#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \
|
2006-07-03 14:24:34 +07:00
|
|
|
.lock = __SPIN_LOCK_UNLOCKED(name.lock), \
|
2005-04-17 05:20:36 +07:00
|
|
|
.task_list = { &(name).task_list, &(name).task_list } }
|
|
|
|
|
|
|
|
#define DECLARE_WAIT_QUEUE_HEAD(name) \
|
|
|
|
wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
|
|
|
|
|
|
|
|
#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \
|
|
|
|
{ .flags = word, .bit_nr = bit, }
|
|
|
|
|
Add wait_on_atomic_t() and wake_up_atomic_t()
Add wait_on_atomic_t() and wake_up_atomic_t() to indicate became-zero events on
atomic_t types. This uses the bit-wake waitqueue table. The key is set to a
value outside of the number of bits in a long so that wait_on_bit() won't be
woken up accidentally.
What I'm using this for is: in a following patch I add a counter to struct
fscache_cookie to count the number of outstanding operations that need access
to netfs data. The way this works is:
(1) When a cookie is allocated, the counter is initialised to 1.
(2) When an operation wants to access netfs data, it calls atomic_inc_unless()
to increment the counter before it does so. If it was 0, then the counter
isn't incremented, the operation isn't permitted to access the netfs data
(which might by this point no longer exist) and the operation aborts in
some appropriate manner.
(3) When an operation finishes with the netfs data, it decrements the counter
and if it reaches 0, calls wake_up_atomic_t() on it - the assumption being
that it was the last blocker.
(4) When a cookie is released, the counter is decremented and the releaser
uses wait_on_atomic_t() to wait for the counter to become 0 - which should
indicate no one is using the netfs data any longer. The netfs data can
then be destroyed.
There are some alternatives that I have thought of and that have been suggested
by Tejun Heo:
(A) Using wait_on_bit() to wait on a bit in the counter. This doesn't work
because if that bit happens to be 0 then the wait won't happen - even if
the counter is non-zero.
(B) Using wait_on_bit() to wait on a flag elsewhere which is cleared when the
counter reaches 0. Such a flag would be redundant and would add
complexity.
(C) Adding a waitqueue to fscache_cookie - this would expand that struct by
several words for an event that happens just once in each cookie's
lifetime. Further, cookies are generally per-file so there are likely to
be a lot of them.
(D) Similar to (C), but add a pointer to a waitqueue in the cookie instead of
a waitqueue. This would add single word per cookie and so would be less
of an expansion - but still an expansion.
(E) Adding a static waitqueue to the fscache module. Generally this would be
fine, but under certain circumstances many cookies will all get added at
the same time (eg. NFS umount, cache withdrawal) thereby presenting
scaling issues. Note that the wait may be significant as disk I/O may be
in progress.
So, I think reusing the wait_on_bit() waitqueue set is reasonable. I don't
make much use of the waitqueue I need on a per-cookie basis, but sometimes I
have a huge flood of the cookies to deal with.
I also don't want to add a whole new set of global waitqueue tables
specifically for the dec-to-0 event if I can reuse the bit tables.
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-By: Milosz Tanski <milosz@adfin.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
2013-05-11 01:50:26 +07:00
|
|
|
#define __WAIT_ATOMIC_T_KEY_INITIALIZER(p) \
|
|
|
|
{ .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, }
|
|
|
|
|
2011-12-13 19:20:54 +07:00
|
|
|
extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *);
|
2009-08-10 18:33:05 +07:00
|
|
|
|
|
|
|
#define init_waitqueue_head(q) \
|
|
|
|
do { \
|
|
|
|
static struct lock_class_key __key; \
|
|
|
|
\
|
2011-12-13 19:20:54 +07:00
|
|
|
__init_waitqueue_head((q), #q, &__key); \
|
2009-08-10 18:33:05 +07:00
|
|
|
} while (0)
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2006-10-30 13:46:36 +07:00
|
|
|
#ifdef CONFIG_LOCKDEP
|
|
|
|
# define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \
|
|
|
|
({ init_waitqueue_head(&name); name; })
|
|
|
|
# define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) \
|
|
|
|
wait_queue_head_t name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name)
|
|
|
|
#else
|
|
|
|
# define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name)
|
|
|
|
#endif
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
|
|
|
|
{
|
|
|
|
q->flags = 0;
|
2005-06-23 14:10:27 +07:00
|
|
|
q->private = p;
|
2005-04-17 05:20:36 +07:00
|
|
|
q->func = default_wake_function;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void init_waitqueue_func_entry(wait_queue_t *q,
|
|
|
|
wait_queue_func_t func)
|
|
|
|
{
|
|
|
|
q->flags = 0;
|
2005-06-23 14:10:27 +07:00
|
|
|
q->private = NULL;
|
2005-04-17 05:20:36 +07:00
|
|
|
q->func = func;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int waitqueue_active(wait_queue_head_t *q)
|
|
|
|
{
|
|
|
|
return !list_empty(&q->task_list);
|
|
|
|
}
|
|
|
|
|
2008-02-14 06:03:15 +07:00
|
|
|
extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
|
|
|
|
extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait);
|
|
|
|
extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
|
|
|
|
{
|
|
|
|
list_add(&new->task_list, &head->task_list);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Used for wake-one threads:
|
|
|
|
*/
|
2010-05-07 13:33:26 +07:00
|
|
|
static inline void __add_wait_queue_exclusive(wait_queue_head_t *q,
|
|
|
|
wait_queue_t *wait)
|
|
|
|
{
|
|
|
|
wait->flags |= WQ_FLAG_EXCLUSIVE;
|
|
|
|
__add_wait_queue(q, wait);
|
|
|
|
}
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
static inline void __add_wait_queue_tail(wait_queue_head_t *head,
|
2010-05-07 13:33:26 +07:00
|
|
|
wait_queue_t *new)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
|
|
|
list_add_tail(&new->task_list, &head->task_list);
|
|
|
|
}
|
|
|
|
|
2010-05-07 13:33:26 +07:00
|
|
|
static inline void __add_wait_queue_tail_exclusive(wait_queue_head_t *q,
|
|
|
|
wait_queue_t *wait)
|
|
|
|
{
|
|
|
|
wait->flags |= WQ_FLAG_EXCLUSIVE;
|
|
|
|
__add_wait_queue_tail(q, wait);
|
|
|
|
}
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
static inline void __remove_wait_queue(wait_queue_head_t *head,
|
|
|
|
wait_queue_t *old)
|
|
|
|
{
|
|
|
|
list_del(&old->task_list);
|
|
|
|
}
|
|
|
|
|
2008-02-14 06:03:15 +07:00
|
|
|
void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
|
epoll keyed wakeups: add __wake_up_locked_key() and __wake_up_sync_key()
This patchset introduces wakeup hints for some of the most popular (from
epoll POV) devices, so that epoll code can avoid spurious wakeups on its
waiters.
The problem with epoll is that the callback-based wakeups do not, ATM,
carry any information about the events the wakeup is related to. So the
only choice epoll has (not being able to call f_op->poll() from inside the
callback), is to add the file* to a ready-list and resolve the real events
later on, at epoll_wait() (or its own f_op->poll()) time. This can cause
spurious wakeups, since the wake_up() itself might be for an event the
caller is not interested into.
The rate of these spurious wakeup can be pretty high in case of many
network sockets being monitored.
By allowing devices to report the events the wakeups refer to (at least
the two major classes - POLLIN/POLLOUT), we are able to spare useless
wakeups by proper handling inside the epoll's poll callback.
Epoll will have in any case to call f_op->poll() on the file* later on,
since the change to be done in order to have the full event set sent via
wakeup, is too invasive for the way our f_op->poll() system works (the
full event set is calculated inside the poll function - there are too many
of them to even start thinking the change - also poll/select would need
change too).
Epoll is changed in a way that both devices which send event hints, and
the ones that don't, are correctly handled. The former will gain some
efficiency though.
As a general rule for devices, would be to add an event mask by using
key-aware wakeup macros, when making up poll wait queues. I tested it
(together with the epoll's poll fix patch Andrew has in -mm) and wakeups
for the supported devices are correctly filtered.
Test program available here:
http://www.xmailserver.org/epoll_test.c
This patch:
Nothing revolutionary here. Just using the available "key" that our
wakeup core already support. The __wake_up_locked_key() was no brainer,
since both __wake_up_locked() and __wake_up_locked_key() are thin wrappers
around __wake_up_common().
The __wake_up_sync() function had a body, so the choice was between
borrowing the body for __wake_up_sync_key() and calling it from
__wake_up_sync(), or make an inline and calling it from both. I chose the
former since in most archs it all resolves to "mov $0, REG; jmp ADDR".
Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: David Miller <davem@davemloft.net>
Cc: William Lee Irwin III <wli@movementarian.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-04-01 05:24:20 +07:00
|
|
|
void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
|
|
|
|
void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr,
|
|
|
|
void *key);
|
2011-12-01 06:04:00 +07:00
|
|
|
void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr);
|
epoll keyed wakeups: add __wake_up_locked_key() and __wake_up_sync_key()
This patchset introduces wakeup hints for some of the most popular (from
epoll POV) devices, so that epoll code can avoid spurious wakeups on its
waiters.
The problem with epoll is that the callback-based wakeups do not, ATM,
carry any information about the events the wakeup is related to. So the
only choice epoll has (not being able to call f_op->poll() from inside the
callback), is to add the file* to a ready-list and resolve the real events
later on, at epoll_wait() (or its own f_op->poll()) time. This can cause
spurious wakeups, since the wake_up() itself might be for an event the
caller is not interested into.
The rate of these spurious wakeup can be pretty high in case of many
network sockets being monitored.
By allowing devices to report the events the wakeups refer to (at least
the two major classes - POLLIN/POLLOUT), we are able to spare useless
wakeups by proper handling inside the epoll's poll callback.
Epoll will have in any case to call f_op->poll() on the file* later on,
since the change to be done in order to have the full event set sent via
wakeup, is too invasive for the way our f_op->poll() system works (the
full event set is calculated inside the poll function - there are too many
of them to even start thinking the change - also poll/select would need
change too).
Epoll is changed in a way that both devices which send event hints, and
the ones that don't, are correctly handled. The former will gain some
efficiency though.
As a general rule for devices, would be to add an event mask by using
key-aware wakeup macros, when making up poll wait queues. I tested it
(together with the epoll's poll fix patch Andrew has in -mm) and wakeups
for the supported devices are correctly filtered.
Test program available here:
http://www.xmailserver.org/epoll_test.c
This patch:
Nothing revolutionary here. Just using the available "key" that our
wakeup core already support. The __wake_up_locked_key() was no brainer,
since both __wake_up_locked() and __wake_up_locked_key() are thin wrappers
around __wake_up_common().
The __wake_up_sync() function had a body, so the choice was between
borrowing the body for __wake_up_sync_key() and calling it from
__wake_up_sync(), or make an inline and calling it from both. I chose the
former since in most archs it all resolves to "mov $0, REG; jmp ADDR".
Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: David Miller <davem@davemloft.net>
Cc: William Lee Irwin III <wli@movementarian.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-04-01 05:24:20 +07:00
|
|
|
void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
|
2008-02-14 06:03:15 +07:00
|
|
|
void __wake_up_bit(wait_queue_head_t *, void *, int);
|
|
|
|
int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned);
|
|
|
|
int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned);
|
|
|
|
void wake_up_bit(void *, int);
|
Add wait_on_atomic_t() and wake_up_atomic_t()
Add wait_on_atomic_t() and wake_up_atomic_t() to indicate became-zero events on
atomic_t types. This uses the bit-wake waitqueue table. The key is set to a
value outside of the number of bits in a long so that wait_on_bit() won't be
woken up accidentally.
What I'm using this for is: in a following patch I add a counter to struct
fscache_cookie to count the number of outstanding operations that need access
to netfs data. The way this works is:
(1) When a cookie is allocated, the counter is initialised to 1.
(2) When an operation wants to access netfs data, it calls atomic_inc_unless()
to increment the counter before it does so. If it was 0, then the counter
isn't incremented, the operation isn't permitted to access the netfs data
(which might by this point no longer exist) and the operation aborts in
some appropriate manner.
(3) When an operation finishes with the netfs data, it decrements the counter
and if it reaches 0, calls wake_up_atomic_t() on it - the assumption being
that it was the last blocker.
(4) When a cookie is released, the counter is decremented and the releaser
uses wait_on_atomic_t() to wait for the counter to become 0 - which should
indicate no one is using the netfs data any longer. The netfs data can
then be destroyed.
There are some alternatives that I have thought of and that have been suggested
by Tejun Heo:
(A) Using wait_on_bit() to wait on a bit in the counter. This doesn't work
because if that bit happens to be 0 then the wait won't happen - even if
the counter is non-zero.
(B) Using wait_on_bit() to wait on a flag elsewhere which is cleared when the
counter reaches 0. Such a flag would be redundant and would add
complexity.
(C) Adding a waitqueue to fscache_cookie - this would expand that struct by
several words for an event that happens just once in each cookie's
lifetime. Further, cookies are generally per-file so there are likely to
be a lot of them.
(D) Similar to (C), but add a pointer to a waitqueue in the cookie instead of
a waitqueue. This would add single word per cookie and so would be less
of an expansion - but still an expansion.
(E) Adding a static waitqueue to the fscache module. Generally this would be
fine, but under certain circumstances many cookies will all get added at
the same time (eg. NFS umount, cache withdrawal) thereby presenting
scaling issues. Note that the wait may be significant as disk I/O may be
in progress.
So, I think reusing the wait_on_bit() waitqueue set is reasonable. I don't
make much use of the waitqueue I need on a per-cookie basis, but sometimes I
have a huge flood of the cookies to deal with.
I also don't want to add a whole new set of global waitqueue tables
specifically for the dec-to-0 event if I can reuse the bit tables.
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-By: Milosz Tanski <milosz@adfin.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
2013-05-11 01:50:26 +07:00
|
|
|
void wake_up_atomic_t(atomic_t *);
|
2008-02-14 06:03:15 +07:00
|
|
|
int out_of_line_wait_on_bit(void *, int, int (*)(void *), unsigned);
|
|
|
|
int out_of_line_wait_on_bit_lock(void *, int, int (*)(void *), unsigned);
|
Add wait_on_atomic_t() and wake_up_atomic_t()
Add wait_on_atomic_t() and wake_up_atomic_t() to indicate became-zero events on
atomic_t types. This uses the bit-wake waitqueue table. The key is set to a
value outside of the number of bits in a long so that wait_on_bit() won't be
woken up accidentally.
What I'm using this for is: in a following patch I add a counter to struct
fscache_cookie to count the number of outstanding operations that need access
to netfs data. The way this works is:
(1) When a cookie is allocated, the counter is initialised to 1.
(2) When an operation wants to access netfs data, it calls atomic_inc_unless()
to increment the counter before it does so. If it was 0, then the counter
isn't incremented, the operation isn't permitted to access the netfs data
(which might by this point no longer exist) and the operation aborts in
some appropriate manner.
(3) When an operation finishes with the netfs data, it decrements the counter
and if it reaches 0, calls wake_up_atomic_t() on it - the assumption being
that it was the last blocker.
(4) When a cookie is released, the counter is decremented and the releaser
uses wait_on_atomic_t() to wait for the counter to become 0 - which should
indicate no one is using the netfs data any longer. The netfs data can
then be destroyed.
There are some alternatives that I have thought of and that have been suggested
by Tejun Heo:
(A) Using wait_on_bit() to wait on a bit in the counter. This doesn't work
because if that bit happens to be 0 then the wait won't happen - even if
the counter is non-zero.
(B) Using wait_on_bit() to wait on a flag elsewhere which is cleared when the
counter reaches 0. Such a flag would be redundant and would add
complexity.
(C) Adding a waitqueue to fscache_cookie - this would expand that struct by
several words for an event that happens just once in each cookie's
lifetime. Further, cookies are generally per-file so there are likely to
be a lot of them.
(D) Similar to (C), but add a pointer to a waitqueue in the cookie instead of
a waitqueue. This would add single word per cookie and so would be less
of an expansion - but still an expansion.
(E) Adding a static waitqueue to the fscache module. Generally this would be
fine, but under certain circumstances many cookies will all get added at
the same time (eg. NFS umount, cache withdrawal) thereby presenting
scaling issues. Note that the wait may be significant as disk I/O may be
in progress.
So, I think reusing the wait_on_bit() waitqueue set is reasonable. I don't
make much use of the waitqueue I need on a per-cookie basis, but sometimes I
have a huge flood of the cookies to deal with.
I also don't want to add a whole new set of global waitqueue tables
specifically for the dec-to-0 event if I can reuse the bit tables.
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-By: Milosz Tanski <milosz@adfin.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
2013-05-11 01:50:26 +07:00
|
|
|
int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned);
|
2008-02-14 06:03:15 +07:00
|
|
|
wait_queue_head_t *bit_waitqueue(void *, int);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2007-12-07 05:34:36 +07:00
|
|
|
#define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL)
|
|
|
|
#define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL)
|
|
|
|
#define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL)
|
2011-12-01 06:04:00 +07:00
|
|
|
#define wake_up_locked(x) __wake_up_locked((x), TASK_NORMAL, 1)
|
|
|
|
#define wake_up_all_locked(x) __wake_up_locked((x), TASK_NORMAL, 0)
|
2007-12-07 05:34:36 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
|
|
|
|
#define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
|
|
|
|
#define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL)
|
2007-12-07 05:34:36 +07:00
|
|
|
#define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE, 1)
|
2005-04-17 05:20:36 +07:00
|
|
|
|
lockdep: annotate epoll
On Sat, 2008-01-05 at 13:35 -0800, Davide Libenzi wrote:
> I remember I talked with Arjan about this time ago. Basically, since 1)
> you can drop an epoll fd inside another epoll fd 2) callback-based wakeups
> are used, you can see a wake_up() from inside another wake_up(), but they
> will never refer to the same lock instance.
> Think about:
>
> dfd = socket(...);
> efd1 = epoll_create();
> efd2 = epoll_create();
> epoll_ctl(efd1, EPOLL_CTL_ADD, dfd, ...);
> epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...);
>
> When a packet arrives to the device underneath "dfd", the net code will
> issue a wake_up() on its poll wake list. Epoll (efd1) has installed a
> callback wakeup entry on that queue, and the wake_up() performed by the
> "dfd" net code will end up in ep_poll_callback(). At this point epoll
> (efd1) notices that it may have some event ready, so it needs to wake up
> the waiters on its poll wait list (efd2). So it calls ep_poll_safewake()
> that ends up in another wake_up(), after having checked about the
> recursion constraints. That are, no more than EP_MAX_POLLWAKE_NESTS, to
> avoid stack blasting. Never hit the same queue, to avoid loops like:
>
> epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...);
> epoll_ctl(efd3, EPOLL_CTL_ADD, efd2, ...);
> epoll_ctl(efd4, EPOLL_CTL_ADD, efd3, ...);
> epoll_ctl(efd1, EPOLL_CTL_ADD, efd4, ...);
>
> The code "if (tncur->wq == wq || ..." prevents re-entering the same
> queue/lock.
Since the epoll code is very careful to not nest same instance locks
allow the recursion.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-05 13:27:20 +07:00
|
|
|
/*
|
2009-04-01 05:24:20 +07:00
|
|
|
* Wakeup macros to be used to report events to the targets.
|
lockdep: annotate epoll
On Sat, 2008-01-05 at 13:35 -0800, Davide Libenzi wrote:
> I remember I talked with Arjan about this time ago. Basically, since 1)
> you can drop an epoll fd inside another epoll fd 2) callback-based wakeups
> are used, you can see a wake_up() from inside another wake_up(), but they
> will never refer to the same lock instance.
> Think about:
>
> dfd = socket(...);
> efd1 = epoll_create();
> efd2 = epoll_create();
> epoll_ctl(efd1, EPOLL_CTL_ADD, dfd, ...);
> epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...);
>
> When a packet arrives to the device underneath "dfd", the net code will
> issue a wake_up() on its poll wake list. Epoll (efd1) has installed a
> callback wakeup entry on that queue, and the wake_up() performed by the
> "dfd" net code will end up in ep_poll_callback(). At this point epoll
> (efd1) notices that it may have some event ready, so it needs to wake up
> the waiters on its poll wait list (efd2). So it calls ep_poll_safewake()
> that ends up in another wake_up(), after having checked about the
> recursion constraints. That are, no more than EP_MAX_POLLWAKE_NESTS, to
> avoid stack blasting. Never hit the same queue, to avoid loops like:
>
> epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...);
> epoll_ctl(efd3, EPOLL_CTL_ADD, efd2, ...);
> epoll_ctl(efd4, EPOLL_CTL_ADD, efd3, ...);
> epoll_ctl(efd1, EPOLL_CTL_ADD, efd4, ...);
>
> The code "if (tncur->wq == wq || ..." prevents re-entering the same
> queue/lock.
Since the epoll code is very careful to not nest same instance locks
allow the recursion.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-05 13:27:20 +07:00
|
|
|
*/
|
2009-04-01 05:24:20 +07:00
|
|
|
#define wake_up_poll(x, m) \
|
|
|
|
__wake_up(x, TASK_NORMAL, 1, (void *) (m))
|
|
|
|
#define wake_up_locked_poll(x, m) \
|
|
|
|
__wake_up_locked_key((x), TASK_NORMAL, (void *) (m))
|
|
|
|
#define wake_up_interruptible_poll(x, m) \
|
|
|
|
__wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m))
|
|
|
|
#define wake_up_interruptible_sync_poll(x, m) \
|
|
|
|
__wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m))
|
lockdep: annotate epoll
On Sat, 2008-01-05 at 13:35 -0800, Davide Libenzi wrote:
> I remember I talked with Arjan about this time ago. Basically, since 1)
> you can drop an epoll fd inside another epoll fd 2) callback-based wakeups
> are used, you can see a wake_up() from inside another wake_up(), but they
> will never refer to the same lock instance.
> Think about:
>
> dfd = socket(...);
> efd1 = epoll_create();
> efd2 = epoll_create();
> epoll_ctl(efd1, EPOLL_CTL_ADD, dfd, ...);
> epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...);
>
> When a packet arrives to the device underneath "dfd", the net code will
> issue a wake_up() on its poll wake list. Epoll (efd1) has installed a
> callback wakeup entry on that queue, and the wake_up() performed by the
> "dfd" net code will end up in ep_poll_callback(). At this point epoll
> (efd1) notices that it may have some event ready, so it needs to wake up
> the waiters on its poll wait list (efd2). So it calls ep_poll_safewake()
> that ends up in another wake_up(), after having checked about the
> recursion constraints. That are, no more than EP_MAX_POLLWAKE_NESTS, to
> avoid stack blasting. Never hit the same queue, to avoid loops like:
>
> epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...);
> epoll_ctl(efd3, EPOLL_CTL_ADD, efd2, ...);
> epoll_ctl(efd4, EPOLL_CTL_ADD, efd3, ...);
> epoll_ctl(efd1, EPOLL_CTL_ADD, efd4, ...);
>
> The code "if (tncur->wq == wq || ..." prevents re-entering the same
> queue/lock.
Since the epoll code is very careful to not nest same instance locks
allow the recursion.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-05 13:27:20 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#define __wait_event(wq, condition) \
|
|
|
|
do { \
|
|
|
|
DEFINE_WAIT(__wait); \
|
|
|
|
\
|
|
|
|
for (;;) { \
|
|
|
|
prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
|
|
|
|
if (condition) \
|
|
|
|
break; \
|
|
|
|
schedule(); \
|
|
|
|
} \
|
|
|
|
finish_wait(&wq, &__wait); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event - sleep until a condition gets true
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true. The @condition is checked each time
|
|
|
|
* the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*/
|
|
|
|
#define wait_event(wq, condition) \
|
|
|
|
do { \
|
|
|
|
if (condition) \
|
|
|
|
break; \
|
|
|
|
__wait_event(wq, condition); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define __wait_event_timeout(wq, condition, ret) \
|
|
|
|
do { \
|
|
|
|
DEFINE_WAIT(__wait); \
|
|
|
|
\
|
|
|
|
for (;;) { \
|
|
|
|
prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
|
|
|
|
if (condition) \
|
|
|
|
break; \
|
|
|
|
ret = schedule_timeout(ret); \
|
|
|
|
if (!ret) \
|
|
|
|
break; \
|
|
|
|
} \
|
2013-05-25 05:55:09 +07:00
|
|
|
if (!ret && (condition)) \
|
|
|
|
ret = 1; \
|
2005-04-17 05:20:36 +07:00
|
|
|
finish_wait(&wq, &__wait); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_timeout - sleep until a condition gets true or a timeout elapses
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
* @timeout: timeout, in jiffies
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true. The @condition is checked each time
|
|
|
|
* the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
2013-05-25 05:55:09 +07:00
|
|
|
* The function returns 0 if the @timeout elapsed, or the remaining
|
|
|
|
* jiffies (at least 1) if the @condition evaluated to %true before
|
|
|
|
* the @timeout elapsed.
|
2005-04-17 05:20:36 +07:00
|
|
|
*/
|
|
|
|
#define wait_event_timeout(wq, condition, timeout) \
|
|
|
|
({ \
|
|
|
|
long __ret = timeout; \
|
|
|
|
if (!(condition)) \
|
|
|
|
__wait_event_timeout(wq, condition, __ret); \
|
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
|
|
|
#define __wait_event_interruptible(wq, condition, ret) \
|
|
|
|
do { \
|
|
|
|
DEFINE_WAIT(__wait); \
|
|
|
|
\
|
|
|
|
for (;;) { \
|
|
|
|
prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \
|
|
|
|
if (condition) \
|
|
|
|
break; \
|
|
|
|
if (!signal_pending(current)) { \
|
|
|
|
schedule(); \
|
|
|
|
continue; \
|
|
|
|
} \
|
|
|
|
ret = -ERESTARTSYS; \
|
|
|
|
break; \
|
|
|
|
} \
|
|
|
|
finish_wait(&wq, &__wait); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_interruptible - sleep until a condition gets true
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true or a signal is received.
|
|
|
|
* The @condition is checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* The function will return -ERESTARTSYS if it was interrupted by a
|
|
|
|
* signal and 0 if @condition evaluated to true.
|
|
|
|
*/
|
|
|
|
#define wait_event_interruptible(wq, condition) \
|
|
|
|
({ \
|
|
|
|
int __ret = 0; \
|
|
|
|
if (!(condition)) \
|
|
|
|
__wait_event_interruptible(wq, condition, __ret); \
|
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
|
|
|
#define __wait_event_interruptible_timeout(wq, condition, ret) \
|
|
|
|
do { \
|
|
|
|
DEFINE_WAIT(__wait); \
|
|
|
|
\
|
|
|
|
for (;;) { \
|
|
|
|
prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \
|
|
|
|
if (condition) \
|
|
|
|
break; \
|
|
|
|
if (!signal_pending(current)) { \
|
|
|
|
ret = schedule_timeout(ret); \
|
|
|
|
if (!ret) \
|
|
|
|
break; \
|
|
|
|
continue; \
|
|
|
|
} \
|
|
|
|
ret = -ERESTARTSYS; \
|
|
|
|
break; \
|
|
|
|
} \
|
2013-05-25 05:55:09 +07:00
|
|
|
if (!ret && (condition)) \
|
|
|
|
ret = 1; \
|
2005-04-17 05:20:36 +07:00
|
|
|
finish_wait(&wq, &__wait); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
* @timeout: timeout, in jiffies
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true or a signal is received.
|
|
|
|
* The @condition is checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
2013-05-25 05:55:09 +07:00
|
|
|
* Returns:
|
|
|
|
* 0 if the @timeout elapsed, -%ERESTARTSYS if it was interrupted by
|
|
|
|
* a signal, or the remaining jiffies (at least 1) if the @condition
|
|
|
|
* evaluated to %true before the @timeout elapsed.
|
2005-04-17 05:20:36 +07:00
|
|
|
*/
|
|
|
|
#define wait_event_interruptible_timeout(wq, condition, timeout) \
|
|
|
|
({ \
|
|
|
|
long __ret = timeout; \
|
|
|
|
if (!(condition)) \
|
|
|
|
__wait_event_interruptible_timeout(wq, condition, __ret); \
|
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
2013-05-08 06:18:43 +07:00
|
|
|
#define __wait_event_hrtimeout(wq, condition, timeout, state) \
|
|
|
|
({ \
|
|
|
|
int __ret = 0; \
|
|
|
|
DEFINE_WAIT(__wait); \
|
|
|
|
struct hrtimer_sleeper __t; \
|
|
|
|
\
|
|
|
|
hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, \
|
|
|
|
HRTIMER_MODE_REL); \
|
|
|
|
hrtimer_init_sleeper(&__t, current); \
|
|
|
|
if ((timeout).tv64 != KTIME_MAX) \
|
|
|
|
hrtimer_start_range_ns(&__t.timer, timeout, \
|
|
|
|
current->timer_slack_ns, \
|
|
|
|
HRTIMER_MODE_REL); \
|
|
|
|
\
|
|
|
|
for (;;) { \
|
|
|
|
prepare_to_wait(&wq, &__wait, state); \
|
|
|
|
if (condition) \
|
|
|
|
break; \
|
|
|
|
if (state == TASK_INTERRUPTIBLE && \
|
|
|
|
signal_pending(current)) { \
|
|
|
|
__ret = -ERESTARTSYS; \
|
|
|
|
break; \
|
|
|
|
} \
|
|
|
|
if (!__t.task) { \
|
|
|
|
__ret = -ETIME; \
|
|
|
|
break; \
|
|
|
|
} \
|
|
|
|
schedule(); \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
hrtimer_cancel(&__t.timer); \
|
|
|
|
destroy_hrtimer_on_stack(&__t.timer); \
|
|
|
|
finish_wait(&wq, &__wait); \
|
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_hrtimeout - sleep until a condition gets true or a timeout elapses
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
* @timeout: timeout, as a ktime_t
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true or a signal is received.
|
|
|
|
* The @condition is checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* The function returns 0 if @condition became true, or -ETIME if the timeout
|
|
|
|
* elapsed.
|
|
|
|
*/
|
|
|
|
#define wait_event_hrtimeout(wq, condition, timeout) \
|
|
|
|
({ \
|
|
|
|
int __ret = 0; \
|
|
|
|
if (!(condition)) \
|
|
|
|
__ret = __wait_event_hrtimeout(wq, condition, timeout, \
|
|
|
|
TASK_UNINTERRUPTIBLE); \
|
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_interruptible_hrtimeout - sleep until a condition gets true or a timeout elapses
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
* @timeout: timeout, as a ktime_t
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true or a signal is received.
|
|
|
|
* The @condition is checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* The function returns 0 if @condition became true, -ERESTARTSYS if it was
|
|
|
|
* interrupted by a signal, or -ETIME if the timeout elapsed.
|
|
|
|
*/
|
|
|
|
#define wait_event_interruptible_hrtimeout(wq, condition, timeout) \
|
|
|
|
({ \
|
|
|
|
long __ret = 0; \
|
|
|
|
if (!(condition)) \
|
|
|
|
__ret = __wait_event_hrtimeout(wq, condition, timeout, \
|
|
|
|
TASK_INTERRUPTIBLE); \
|
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#define __wait_event_interruptible_exclusive(wq, condition, ret) \
|
|
|
|
do { \
|
|
|
|
DEFINE_WAIT(__wait); \
|
|
|
|
\
|
|
|
|
for (;;) { \
|
|
|
|
prepare_to_wait_exclusive(&wq, &__wait, \
|
|
|
|
TASK_INTERRUPTIBLE); \
|
2009-02-05 06:12:14 +07:00
|
|
|
if (condition) { \
|
|
|
|
finish_wait(&wq, &__wait); \
|
2005-04-17 05:20:36 +07:00
|
|
|
break; \
|
2009-02-05 06:12:14 +07:00
|
|
|
} \
|
2005-04-17 05:20:36 +07:00
|
|
|
if (!signal_pending(current)) { \
|
|
|
|
schedule(); \
|
|
|
|
continue; \
|
|
|
|
} \
|
|
|
|
ret = -ERESTARTSYS; \
|
2009-02-05 06:12:14 +07:00
|
|
|
abort_exclusive_wait(&wq, &__wait, \
|
|
|
|
TASK_INTERRUPTIBLE, NULL); \
|
2005-04-17 05:20:36 +07:00
|
|
|
break; \
|
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define wait_event_interruptible_exclusive(wq, condition) \
|
|
|
|
({ \
|
|
|
|
int __ret = 0; \
|
|
|
|
if (!(condition)) \
|
|
|
|
__wait_event_interruptible_exclusive(wq, condition, __ret);\
|
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
2010-05-05 17:53:11 +07:00
|
|
|
|
|
|
|
#define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \
|
|
|
|
({ \
|
|
|
|
int __ret = 0; \
|
|
|
|
DEFINE_WAIT(__wait); \
|
|
|
|
if (exclusive) \
|
|
|
|
__wait.flags |= WQ_FLAG_EXCLUSIVE; \
|
|
|
|
do { \
|
|
|
|
if (likely(list_empty(&__wait.task_list))) \
|
|
|
|
__add_wait_queue_tail(&(wq), &__wait); \
|
|
|
|
set_current_state(TASK_INTERRUPTIBLE); \
|
|
|
|
if (signal_pending(current)) { \
|
|
|
|
__ret = -ERESTARTSYS; \
|
|
|
|
break; \
|
|
|
|
} \
|
|
|
|
if (irq) \
|
|
|
|
spin_unlock_irq(&(wq).lock); \
|
|
|
|
else \
|
|
|
|
spin_unlock(&(wq).lock); \
|
|
|
|
schedule(); \
|
|
|
|
if (irq) \
|
|
|
|
spin_lock_irq(&(wq).lock); \
|
|
|
|
else \
|
|
|
|
spin_lock(&(wq).lock); \
|
|
|
|
} while (!(condition)); \
|
|
|
|
__remove_wait_queue(&(wq), &__wait); \
|
|
|
|
__set_current_state(TASK_RUNNING); \
|
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_interruptible_locked - sleep until a condition gets true
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true or a signal is received.
|
|
|
|
* The @condition is checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* It must be called with wq.lock being held. This spinlock is
|
|
|
|
* unlocked while sleeping but @condition testing is done while lock
|
|
|
|
* is held and when this macro exits the lock is held.
|
|
|
|
*
|
|
|
|
* The lock is locked/unlocked using spin_lock()/spin_unlock()
|
|
|
|
* functions which must match the way they are locked/unlocked outside
|
|
|
|
* of this macro.
|
|
|
|
*
|
|
|
|
* wake_up_locked() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* The function will return -ERESTARTSYS if it was interrupted by a
|
|
|
|
* signal and 0 if @condition evaluated to true.
|
|
|
|
*/
|
|
|
|
#define wait_event_interruptible_locked(wq, condition) \
|
|
|
|
((condition) \
|
|
|
|
? 0 : __wait_event_interruptible_locked(wq, condition, 0, 0))
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_interruptible_locked_irq - sleep until a condition gets true
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true or a signal is received.
|
|
|
|
* The @condition is checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* It must be called with wq.lock being held. This spinlock is
|
|
|
|
* unlocked while sleeping but @condition testing is done while lock
|
|
|
|
* is held and when this macro exits the lock is held.
|
|
|
|
*
|
|
|
|
* The lock is locked/unlocked using spin_lock_irq()/spin_unlock_irq()
|
|
|
|
* functions which must match the way they are locked/unlocked outside
|
|
|
|
* of this macro.
|
|
|
|
*
|
|
|
|
* wake_up_locked() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* The function will return -ERESTARTSYS if it was interrupted by a
|
|
|
|
* signal and 0 if @condition evaluated to true.
|
|
|
|
*/
|
|
|
|
#define wait_event_interruptible_locked_irq(wq, condition) \
|
|
|
|
((condition) \
|
|
|
|
? 0 : __wait_event_interruptible_locked(wq, condition, 0, 1))
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_interruptible_exclusive_locked - sleep exclusively until a condition gets true
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true or a signal is received.
|
|
|
|
* The @condition is checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* It must be called with wq.lock being held. This spinlock is
|
|
|
|
* unlocked while sleeping but @condition testing is done while lock
|
|
|
|
* is held and when this macro exits the lock is held.
|
|
|
|
*
|
|
|
|
* The lock is locked/unlocked using spin_lock()/spin_unlock()
|
|
|
|
* functions which must match the way they are locked/unlocked outside
|
|
|
|
* of this macro.
|
|
|
|
*
|
|
|
|
* The process is put on the wait queue with an WQ_FLAG_EXCLUSIVE flag
|
|
|
|
* set thus when other process waits process on the list if this
|
|
|
|
* process is awaken further processes are not considered.
|
|
|
|
*
|
|
|
|
* wake_up_locked() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* The function will return -ERESTARTSYS if it was interrupted by a
|
|
|
|
* signal and 0 if @condition evaluated to true.
|
|
|
|
*/
|
|
|
|
#define wait_event_interruptible_exclusive_locked(wq, condition) \
|
|
|
|
((condition) \
|
|
|
|
? 0 : __wait_event_interruptible_locked(wq, condition, 1, 0))
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_interruptible_exclusive_locked_irq - sleep until a condition gets true
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true or a signal is received.
|
|
|
|
* The @condition is checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* It must be called with wq.lock being held. This spinlock is
|
|
|
|
* unlocked while sleeping but @condition testing is done while lock
|
|
|
|
* is held and when this macro exits the lock is held.
|
|
|
|
*
|
|
|
|
* The lock is locked/unlocked using spin_lock_irq()/spin_unlock_irq()
|
|
|
|
* functions which must match the way they are locked/unlocked outside
|
|
|
|
* of this macro.
|
|
|
|
*
|
|
|
|
* The process is put on the wait queue with an WQ_FLAG_EXCLUSIVE flag
|
|
|
|
* set thus when other process waits process on the list if this
|
|
|
|
* process is awaken further processes are not considered.
|
|
|
|
*
|
|
|
|
* wake_up_locked() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* The function will return -ERESTARTSYS if it was interrupted by a
|
|
|
|
* signal and 0 if @condition evaluated to true.
|
|
|
|
*/
|
|
|
|
#define wait_event_interruptible_exclusive_locked_irq(wq, condition) \
|
|
|
|
((condition) \
|
|
|
|
? 0 : __wait_event_interruptible_locked(wq, condition, 1, 1))
|
|
|
|
|
|
|
|
|
|
|
|
|
2007-12-07 00:00:00 +07:00
|
|
|
#define __wait_event_killable(wq, condition, ret) \
|
|
|
|
do { \
|
|
|
|
DEFINE_WAIT(__wait); \
|
|
|
|
\
|
|
|
|
for (;;) { \
|
|
|
|
prepare_to_wait(&wq, &__wait, TASK_KILLABLE); \
|
|
|
|
if (condition) \
|
|
|
|
break; \
|
|
|
|
if (!fatal_signal_pending(current)) { \
|
|
|
|
schedule(); \
|
|
|
|
continue; \
|
|
|
|
} \
|
|
|
|
ret = -ERESTARTSYS; \
|
|
|
|
break; \
|
|
|
|
} \
|
|
|
|
finish_wait(&wq, &__wait); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_killable - sleep until a condition gets true
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_KILLABLE) until the
|
|
|
|
* @condition evaluates to true or a signal is received.
|
|
|
|
* The @condition is checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* The function will return -ERESTARTSYS if it was interrupted by a
|
|
|
|
* signal and 0 if @condition evaluated to true.
|
|
|
|
*/
|
|
|
|
#define wait_event_killable(wq, condition) \
|
|
|
|
({ \
|
|
|
|
int __ret = 0; \
|
|
|
|
if (!(condition)) \
|
|
|
|
__wait_event_killable(wq, condition, __ret); \
|
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
2012-11-30 17:42:40 +07:00
|
|
|
|
|
|
|
#define __wait_event_lock_irq(wq, condition, lock, cmd) \
|
|
|
|
do { \
|
|
|
|
DEFINE_WAIT(__wait); \
|
|
|
|
\
|
|
|
|
for (;;) { \
|
|
|
|
prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
|
|
|
|
if (condition) \
|
|
|
|
break; \
|
|
|
|
spin_unlock_irq(&lock); \
|
|
|
|
cmd; \
|
|
|
|
schedule(); \
|
|
|
|
spin_lock_irq(&lock); \
|
|
|
|
} \
|
|
|
|
finish_wait(&wq, &__wait); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_lock_irq_cmd - sleep until a condition gets true. The
|
|
|
|
* condition is checked under the lock. This
|
|
|
|
* is expected to be called with the lock
|
|
|
|
* taken.
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
* @lock: a locked spinlock_t, which will be released before cmd
|
|
|
|
* and schedule() and reacquired afterwards.
|
|
|
|
* @cmd: a command which is invoked outside the critical section before
|
|
|
|
* sleep
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true. The @condition is checked each time
|
|
|
|
* the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* This is supposed to be called while holding the lock. The lock is
|
|
|
|
* dropped before invoking the cmd and going to sleep and is reacquired
|
|
|
|
* afterwards.
|
|
|
|
*/
|
|
|
|
#define wait_event_lock_irq_cmd(wq, condition, lock, cmd) \
|
|
|
|
do { \
|
|
|
|
if (condition) \
|
|
|
|
break; \
|
|
|
|
__wait_event_lock_irq(wq, condition, lock, cmd); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_lock_irq - sleep until a condition gets true. The
|
|
|
|
* condition is checked under the lock. This
|
|
|
|
* is expected to be called with the lock
|
|
|
|
* taken.
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
* @lock: a locked spinlock_t, which will be released before schedule()
|
|
|
|
* and reacquired afterwards.
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true. The @condition is checked each time
|
|
|
|
* the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* This is supposed to be called while holding the lock. The lock is
|
|
|
|
* dropped before going to sleep and is reacquired afterwards.
|
|
|
|
*/
|
|
|
|
#define wait_event_lock_irq(wq, condition, lock) \
|
|
|
|
do { \
|
|
|
|
if (condition) \
|
|
|
|
break; \
|
|
|
|
__wait_event_lock_irq(wq, condition, lock, ); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
|
|
#define __wait_event_interruptible_lock_irq(wq, condition, \
|
|
|
|
lock, ret, cmd) \
|
|
|
|
do { \
|
|
|
|
DEFINE_WAIT(__wait); \
|
|
|
|
\
|
|
|
|
for (;;) { \
|
|
|
|
prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE); \
|
|
|
|
if (condition) \
|
|
|
|
break; \
|
|
|
|
if (signal_pending(current)) { \
|
|
|
|
ret = -ERESTARTSYS; \
|
|
|
|
break; \
|
|
|
|
} \
|
|
|
|
spin_unlock_irq(&lock); \
|
|
|
|
cmd; \
|
|
|
|
schedule(); \
|
|
|
|
spin_lock_irq(&lock); \
|
|
|
|
} \
|
|
|
|
finish_wait(&wq, &__wait); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true.
|
|
|
|
* The condition is checked under the lock. This is expected to
|
|
|
|
* be called with the lock taken.
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
* @lock: a locked spinlock_t, which will be released before cmd and
|
|
|
|
* schedule() and reacquired afterwards.
|
|
|
|
* @cmd: a command which is invoked outside the critical section before
|
|
|
|
* sleep
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true or a signal is received. The @condition is
|
|
|
|
* checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* This is supposed to be called while holding the lock. The lock is
|
|
|
|
* dropped before invoking the cmd and going to sleep and is reacquired
|
|
|
|
* afterwards.
|
|
|
|
*
|
|
|
|
* The macro will return -ERESTARTSYS if it was interrupted by a signal
|
|
|
|
* and 0 if @condition evaluated to true.
|
|
|
|
*/
|
|
|
|
#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd) \
|
|
|
|
({ \
|
|
|
|
int __ret = 0; \
|
|
|
|
\
|
|
|
|
if (!(condition)) \
|
|
|
|
__wait_event_interruptible_lock_irq(wq, condition, \
|
|
|
|
lock, __ret, cmd); \
|
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_event_interruptible_lock_irq - sleep until a condition gets true.
|
|
|
|
* The condition is checked under the lock. This is expected
|
|
|
|
* to be called with the lock taken.
|
|
|
|
* @wq: the waitqueue to wait on
|
|
|
|
* @condition: a C expression for the event to wait for
|
|
|
|
* @lock: a locked spinlock_t, which will be released before schedule()
|
|
|
|
* and reacquired afterwards.
|
|
|
|
*
|
|
|
|
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
|
|
|
|
* @condition evaluates to true or signal is received. The @condition is
|
|
|
|
* checked each time the waitqueue @wq is woken up.
|
|
|
|
*
|
|
|
|
* wake_up() has to be called after changing any variable that could
|
|
|
|
* change the result of the wait condition.
|
|
|
|
*
|
|
|
|
* This is supposed to be called while holding the lock. The lock is
|
|
|
|
* dropped before going to sleep and is reacquired afterwards.
|
|
|
|
*
|
|
|
|
* The macro will return -ERESTARTSYS if it was interrupted by a signal
|
|
|
|
* and 0 if @condition evaluated to true.
|
|
|
|
*/
|
|
|
|
#define wait_event_interruptible_lock_irq(wq, condition, lock) \
|
|
|
|
({ \
|
|
|
|
int __ret = 0; \
|
|
|
|
\
|
|
|
|
if (!(condition)) \
|
|
|
|
__wait_event_interruptible_lock_irq(wq, condition, \
|
|
|
|
lock, __ret, ); \
|
|
|
|
__ret; \
|
|
|
|
})
|
|
|
|
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
* These are the old interfaces to sleep waiting for an event.
|
2007-07-09 23:52:01 +07:00
|
|
|
* They are racy. DO NOT use them, use the wait_event* interfaces above.
|
|
|
|
* We plan to remove these interfaces.
|
2005-04-17 05:20:36 +07:00
|
|
|
*/
|
2007-07-09 23:52:01 +07:00
|
|
|
extern void sleep_on(wait_queue_head_t *q);
|
|
|
|
extern long sleep_on_timeout(wait_queue_head_t *q,
|
|
|
|
signed long timeout);
|
|
|
|
extern void interruptible_sleep_on(wait_queue_head_t *q);
|
|
|
|
extern long interruptible_sleep_on_timeout(wait_queue_head_t *q,
|
|
|
|
signed long timeout);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Waitqueues which are removed from the waitqueue_head at wakeup time
|
|
|
|
*/
|
2008-02-14 06:03:15 +07:00
|
|
|
void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state);
|
|
|
|
void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state);
|
|
|
|
void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
|
2009-02-05 06:12:14 +07:00
|
|
|
void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
|
|
|
|
unsigned int mode, void *key);
|
2005-04-17 05:20:36 +07:00
|
|
|
int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
|
|
|
|
int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
|
|
|
|
|
2009-04-28 16:24:21 +07:00
|
|
|
#define DEFINE_WAIT_FUNC(name, function) \
|
2005-04-17 05:20:36 +07:00
|
|
|
wait_queue_t name = { \
|
2005-06-23 14:10:27 +07:00
|
|
|
.private = current, \
|
2009-04-28 16:24:21 +07:00
|
|
|
.func = function, \
|
2005-05-25 06:31:42 +07:00
|
|
|
.task_list = LIST_HEAD_INIT((name).task_list), \
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
2009-04-28 16:24:21 +07:00
|
|
|
#define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#define DEFINE_WAIT_BIT(name, word, bit) \
|
|
|
|
struct wait_bit_queue name = { \
|
|
|
|
.key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \
|
|
|
|
.wait = { \
|
2005-06-23 14:10:27 +07:00
|
|
|
.private = current, \
|
2005-04-17 05:20:36 +07:00
|
|
|
.func = wake_bit_function, \
|
|
|
|
.task_list = \
|
|
|
|
LIST_HEAD_INIT((name).wait.task_list), \
|
|
|
|
}, \
|
|
|
|
}
|
|
|
|
|
|
|
|
#define init_wait(wait) \
|
|
|
|
do { \
|
2005-06-23 14:10:27 +07:00
|
|
|
(wait)->private = current; \
|
2005-04-17 05:20:36 +07:00
|
|
|
(wait)->func = autoremove_wake_function; \
|
|
|
|
INIT_LIST_HEAD(&(wait)->task_list); \
|
2010-10-05 15:47:57 +07:00
|
|
|
(wait)->flags = 0; \
|
2005-04-17 05:20:36 +07:00
|
|
|
} while (0)
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_on_bit - wait for a bit to be cleared
|
|
|
|
* @word: the word being waited on, a kernel virtual address
|
|
|
|
* @bit: the bit of the word being waited on
|
|
|
|
* @action: the function used to sleep, which may take special actions
|
|
|
|
* @mode: the task state to sleep in
|
|
|
|
*
|
|
|
|
* There is a standard hashed waitqueue table for generic use. This
|
|
|
|
* is the part of the hashtable's accessor API that waits on a bit.
|
|
|
|
* For instance, if one were to have waiters on a bitflag, one would
|
|
|
|
* call wait_on_bit() in threads waiting for the bit to clear.
|
|
|
|
* One uses wait_on_bit() where one is waiting for the bit to clear,
|
|
|
|
* but has no intention of setting it.
|
|
|
|
*/
|
|
|
|
static inline int wait_on_bit(void *word, int bit,
|
|
|
|
int (*action)(void *), unsigned mode)
|
|
|
|
{
|
|
|
|
if (!test_bit(bit, word))
|
|
|
|
return 0;
|
|
|
|
return out_of_line_wait_on_bit(word, bit, action, mode);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it
|
|
|
|
* @word: the word being waited on, a kernel virtual address
|
|
|
|
* @bit: the bit of the word being waited on
|
|
|
|
* @action: the function used to sleep, which may take special actions
|
|
|
|
* @mode: the task state to sleep in
|
|
|
|
*
|
|
|
|
* There is a standard hashed waitqueue table for generic use. This
|
|
|
|
* is the part of the hashtable's accessor API that waits on a bit
|
|
|
|
* when one intends to set it, for instance, trying to lock bitflags.
|
|
|
|
* For instance, if one were to have waiters trying to set bitflag
|
|
|
|
* and waiting for it to clear before setting it, one would call
|
|
|
|
* wait_on_bit() in threads waiting to be able to set the bit.
|
|
|
|
* One uses wait_on_bit_lock() where one is waiting for the bit to
|
|
|
|
* clear with the intention of setting it, and when done, clearing it.
|
|
|
|
*/
|
|
|
|
static inline int wait_on_bit_lock(void *word, int bit,
|
|
|
|
int (*action)(void *), unsigned mode)
|
|
|
|
{
|
|
|
|
if (!test_and_set_bit(bit, word))
|
|
|
|
return 0;
|
|
|
|
return out_of_line_wait_on_bit_lock(word, bit, action, mode);
|
|
|
|
}
|
Add wait_on_atomic_t() and wake_up_atomic_t()
Add wait_on_atomic_t() and wake_up_atomic_t() to indicate became-zero events on
atomic_t types. This uses the bit-wake waitqueue table. The key is set to a
value outside of the number of bits in a long so that wait_on_bit() won't be
woken up accidentally.
What I'm using this for is: in a following patch I add a counter to struct
fscache_cookie to count the number of outstanding operations that need access
to netfs data. The way this works is:
(1) When a cookie is allocated, the counter is initialised to 1.
(2) When an operation wants to access netfs data, it calls atomic_inc_unless()
to increment the counter before it does so. If it was 0, then the counter
isn't incremented, the operation isn't permitted to access the netfs data
(which might by this point no longer exist) and the operation aborts in
some appropriate manner.
(3) When an operation finishes with the netfs data, it decrements the counter
and if it reaches 0, calls wake_up_atomic_t() on it - the assumption being
that it was the last blocker.
(4) When a cookie is released, the counter is decremented and the releaser
uses wait_on_atomic_t() to wait for the counter to become 0 - which should
indicate no one is using the netfs data any longer. The netfs data can
then be destroyed.
There are some alternatives that I have thought of and that have been suggested
by Tejun Heo:
(A) Using wait_on_bit() to wait on a bit in the counter. This doesn't work
because if that bit happens to be 0 then the wait won't happen - even if
the counter is non-zero.
(B) Using wait_on_bit() to wait on a flag elsewhere which is cleared when the
counter reaches 0. Such a flag would be redundant and would add
complexity.
(C) Adding a waitqueue to fscache_cookie - this would expand that struct by
several words for an event that happens just once in each cookie's
lifetime. Further, cookies are generally per-file so there are likely to
be a lot of them.
(D) Similar to (C), but add a pointer to a waitqueue in the cookie instead of
a waitqueue. This would add single word per cookie and so would be less
of an expansion - but still an expansion.
(E) Adding a static waitqueue to the fscache module. Generally this would be
fine, but under certain circumstances many cookies will all get added at
the same time (eg. NFS umount, cache withdrawal) thereby presenting
scaling issues. Note that the wait may be significant as disk I/O may be
in progress.
So, I think reusing the wait_on_bit() waitqueue set is reasonable. I don't
make much use of the waitqueue I need on a per-cookie basis, but sometimes I
have a huge flood of the cookies to deal with.
I also don't want to add a whole new set of global waitqueue tables
specifically for the dec-to-0 event if I can reuse the bit tables.
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-By: Milosz Tanski <milosz@adfin.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
2013-05-11 01:50:26 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* wait_on_atomic_t - Wait for an atomic_t to become 0
|
|
|
|
* @val: The atomic value being waited on, a kernel virtual address
|
|
|
|
* @action: the function used to sleep, which may take special actions
|
|
|
|
* @mode: the task state to sleep in
|
|
|
|
*
|
|
|
|
* Wait for an atomic_t to become 0. We abuse the bit-wait waitqueue table for
|
|
|
|
* the purpose of getting a waitqueue, but we set the key to a bit number
|
|
|
|
* outside of the target 'word'.
|
|
|
|
*/
|
|
|
|
static inline
|
|
|
|
int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode)
|
|
|
|
{
|
|
|
|
if (atomic_read(val) == 0)
|
|
|
|
return 0;
|
|
|
|
return out_of_line_wait_on_atomic_t(val, action, mode);
|
|
|
|
}
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
#endif
|