linux_dsm_epyc7002/include/linux/workqueue.h

411 lines
12 KiB
C
Raw Normal View History

/*
* workqueue.h --- work queue handling for Linux.
*/
#ifndef _LINUX_WORKQUEUE_H
#define _LINUX_WORKQUEUE_H
#include <linux/timer.h>
#include <linux/linkage.h>
#include <linux/bitops.h>
#include <linux/lockdep.h>
#include <linux/threads.h>
#include <asm/atomic.h>
struct workqueue_struct;
2006-11-22 21:55:48 +07:00
struct work_struct;
typedef void (*work_func_t)(struct work_struct *work);
/*
* The first word is the work queue pointer and the flags rolled into
* one
*/
#define work_data_bits(work) ((unsigned long *)(&(work)->data))
enum {
WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */
WORK_STRUCT_CWQ_BIT = 1, /* data points to cwq */
WORK_STRUCT_LINKED_BIT = 2, /* next work is linked to this one */
#ifdef CONFIG_DEBUG_OBJECTS_WORK
WORK_STRUCT_STATIC_BIT = 3, /* static initializer (debugobjects) */
WORK_STRUCT_COLOR_SHIFT = 4, /* color for workqueue flushing */
#else
WORK_STRUCT_COLOR_SHIFT = 3, /* color for workqueue flushing */
#endif
workqueue: reimplement workqueue flushing using color coded works Reimplement workqueue flushing using color coded works. wq has the current work color which is painted on the works being issued via cwqs. Flushing a workqueue is achieved by advancing the current work colors of cwqs and waiting for all the works which have any of the previous colors to drain. Currently there are 16 possible colors, one is reserved for no color and 15 colors are useable allowing 14 concurrent flushes. When color space gets full, flush attempts are batched up and processed together when color frees up, so even with many concurrent flushers, the new implementation won't build up huge queue of flushers which has to be processed one after another. Only works which are queued via __queue_work() are colored. Works which are directly put on queue using insert_work() use NO_COLOR and don't participate in workqueue flushing. Currently only works used for work-specific flush fall in this category. This new implementation leaves only cleanup_workqueue_thread() as the user of flush_cpu_workqueue(). Just make its users use flush_workqueue() and kthread_stop() directly and kill cleanup_workqueue_thread(). As workqueue flushing doesn't use barrier request anymore, the comment describing the complex synchronization around it in cleanup_workqueue_thread() is removed together with the function. This new implementation is to allow having and sharing multiple workers per cpu. Please note that one more bit is reserved for a future work flag by this patch. This is to avoid shifting bits and updating comments later. Signed-off-by: Tejun Heo <tj@kernel.org>
2010-06-29 15:07:11 +07:00
WORK_STRUCT_COLOR_BITS = 4,
WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT,
WORK_STRUCT_CWQ = 1 << WORK_STRUCT_CWQ_BIT,
WORK_STRUCT_LINKED = 1 << WORK_STRUCT_LINKED_BIT,
#ifdef CONFIG_DEBUG_OBJECTS_WORK
WORK_STRUCT_STATIC = 1 << WORK_STRUCT_STATIC_BIT,
#else
WORK_STRUCT_STATIC = 0,
#endif
workqueue: reimplement workqueue flushing using color coded works Reimplement workqueue flushing using color coded works. wq has the current work color which is painted on the works being issued via cwqs. Flushing a workqueue is achieved by advancing the current work colors of cwqs and waiting for all the works which have any of the previous colors to drain. Currently there are 16 possible colors, one is reserved for no color and 15 colors are useable allowing 14 concurrent flushes. When color space gets full, flush attempts are batched up and processed together when color frees up, so even with many concurrent flushers, the new implementation won't build up huge queue of flushers which has to be processed one after another. Only works which are queued via __queue_work() are colored. Works which are directly put on queue using insert_work() use NO_COLOR and don't participate in workqueue flushing. Currently only works used for work-specific flush fall in this category. This new implementation leaves only cleanup_workqueue_thread() as the user of flush_cpu_workqueue(). Just make its users use flush_workqueue() and kthread_stop() directly and kill cleanup_workqueue_thread(). As workqueue flushing doesn't use barrier request anymore, the comment describing the complex synchronization around it in cleanup_workqueue_thread() is removed together with the function. This new implementation is to allow having and sharing multiple workers per cpu. Please note that one more bit is reserved for a future work flag by this patch. This is to avoid shifting bits and updating comments later. Signed-off-by: Tejun Heo <tj@kernel.org>
2010-06-29 15:07:11 +07:00
/*
* The last color is no color used for works which don't
* participate in workqueue flushing.
*/
WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS) - 1,
WORK_NO_COLOR = WORK_NR_COLORS,
/* special cpu IDs */
WORK_CPU_UNBOUND = NR_CPUS,
WORK_CPU_NONE = NR_CPUS + 1,
WORK_CPU_LAST = WORK_CPU_NONE,
workqueue: reimplement workqueue flushing using color coded works Reimplement workqueue flushing using color coded works. wq has the current work color which is painted on the works being issued via cwqs. Flushing a workqueue is achieved by advancing the current work colors of cwqs and waiting for all the works which have any of the previous colors to drain. Currently there are 16 possible colors, one is reserved for no color and 15 colors are useable allowing 14 concurrent flushes. When color space gets full, flush attempts are batched up and processed together when color frees up, so even with many concurrent flushers, the new implementation won't build up huge queue of flushers which has to be processed one after another. Only works which are queued via __queue_work() are colored. Works which are directly put on queue using insert_work() use NO_COLOR and don't participate in workqueue flushing. Currently only works used for work-specific flush fall in this category. This new implementation leaves only cleanup_workqueue_thread() as the user of flush_cpu_workqueue(). Just make its users use flush_workqueue() and kthread_stop() directly and kill cleanup_workqueue_thread(). As workqueue flushing doesn't use barrier request anymore, the comment describing the complex synchronization around it in cleanup_workqueue_thread() is removed together with the function. This new implementation is to allow having and sharing multiple workers per cpu. Please note that one more bit is reserved for a future work flag by this patch. This is to avoid shifting bits and updating comments later. Signed-off-by: Tejun Heo <tj@kernel.org>
2010-06-29 15:07:11 +07:00
/*
* Reserve 7 bits off of cwq pointer w/ debugobjects turned
* off. This makes cwqs aligned to 128 bytes which isn't too
workqueue: reimplement workqueue flushing using color coded works Reimplement workqueue flushing using color coded works. wq has the current work color which is painted on the works being issued via cwqs. Flushing a workqueue is achieved by advancing the current work colors of cwqs and waiting for all the works which have any of the previous colors to drain. Currently there are 16 possible colors, one is reserved for no color and 15 colors are useable allowing 14 concurrent flushes. When color space gets full, flush attempts are batched up and processed together when color frees up, so even with many concurrent flushers, the new implementation won't build up huge queue of flushers which has to be processed one after another. Only works which are queued via __queue_work() are colored. Works which are directly put on queue using insert_work() use NO_COLOR and don't participate in workqueue flushing. Currently only works used for work-specific flush fall in this category. This new implementation leaves only cleanup_workqueue_thread() as the user of flush_cpu_workqueue(). Just make its users use flush_workqueue() and kthread_stop() directly and kill cleanup_workqueue_thread(). As workqueue flushing doesn't use barrier request anymore, the comment describing the complex synchronization around it in cleanup_workqueue_thread() is removed together with the function. This new implementation is to allow having and sharing multiple workers per cpu. Please note that one more bit is reserved for a future work flag by this patch. This is to avoid shifting bits and updating comments later. Signed-off-by: Tejun Heo <tj@kernel.org>
2010-06-29 15:07:11 +07:00
* excessive while allowing 15 workqueue flush colors.
*/
WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT +
WORK_STRUCT_COLOR_BITS,
WORK_STRUCT_FLAG_MASK = (1UL << WORK_STRUCT_FLAG_BITS) - 1,
WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
WORK_STRUCT_NO_CPU = WORK_CPU_NONE << WORK_STRUCT_FLAG_BITS,
/* bit mask for work_busy() return values */
WORK_BUSY_PENDING = 1 << 0,
WORK_BUSY_RUNNING = 1 << 1,
};
struct work_struct {
atomic_long_t data;
struct list_head entry;
work_func_t func;
#ifdef CONFIG_LOCKDEP
struct lockdep_map lockdep_map;
#endif
};
#define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU)
#define WORK_DATA_STATIC_INIT() \
ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU | WORK_STRUCT_STATIC)
struct delayed_work {
struct work_struct work;
struct timer_list timer;
};
static inline struct delayed_work *to_delayed_work(struct work_struct *work)
{
return container_of(work, struct delayed_work, work);
}
struct execute_work {
struct work_struct work;
};
#ifdef CONFIG_LOCKDEP
/*
* NB: because we have to copy the lockdep_map, setting _key
* here is required, otherwise it could get initialised to the
* copy of the lockdep_map!
*/
#define __WORK_INIT_LOCKDEP_MAP(n, k) \
.lockdep_map = STATIC_LOCKDEP_MAP_INIT(n, k),
#else
#define __WORK_INIT_LOCKDEP_MAP(n, k)
#endif
2006-11-22 21:55:48 +07:00
#define __WORK_INITIALIZER(n, f) { \
.data = WORK_DATA_STATIC_INIT(), \
.entry = { &(n).entry, &(n).entry }, \
2006-11-22 21:55:48 +07:00
.func = (f), \
__WORK_INIT_LOCKDEP_MAP(#n, &(n)) \
2006-11-22 21:55:48 +07:00
}
#define __DELAYED_WORK_INITIALIZER(n, f) { \
.work = __WORK_INITIALIZER((n).work, (f)), \
.timer = TIMER_INITIALIZER(NULL, 0, 0), \
}
#define DECLARE_WORK(n, f) \
struct work_struct n = __WORK_INITIALIZER(n, f)
#define DECLARE_DELAYED_WORK(n, f) \
struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f)
/*
2006-11-22 21:55:48 +07:00
* initialize a work item's function pointer
*/
2006-11-22 21:55:48 +07:00
#define PREPARE_WORK(_work, _func) \
do { \
(_work)->func = (_func); \
} while (0)
2006-11-22 21:55:48 +07:00
#define PREPARE_DELAYED_WORK(_work, _func) \
PREPARE_WORK(&(_work)->work, (_func))
#ifdef CONFIG_DEBUG_OBJECTS_WORK
extern void __init_work(struct work_struct *work, int onstack);
extern void destroy_work_on_stack(struct work_struct *work);
static inline unsigned int work_static(struct work_struct *work)
{
return *work_data_bits(work) & WORK_STRUCT_STATIC;
}
#else
static inline void __init_work(struct work_struct *work, int onstack) { }
static inline void destroy_work_on_stack(struct work_struct *work) { }
static inline unsigned int work_static(struct work_struct *work) { return 0; }
#endif
/*
* initialize all of a work item in one go
*
* NOTE! No point in using "atomic_long_set()": using a direct
* assignment of the work data initializer allows the compiler
* to generate better code.
*/
#ifdef CONFIG_LOCKDEP
#define __INIT_WORK(_work, _func, _onstack) \
2006-11-22 21:55:48 +07:00
do { \
static struct lock_class_key __key; \
\
__init_work((_work), _onstack); \
(_work)->data = (atomic_long_t) WORK_DATA_INIT(); \
lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0);\
2006-11-22 21:55:48 +07:00
INIT_LIST_HEAD(&(_work)->entry); \
PREPARE_WORK((_work), (_func)); \
} while (0)
#else
#define __INIT_WORK(_work, _func, _onstack) \
do { \
__init_work((_work), _onstack); \
(_work)->data = (atomic_long_t) WORK_DATA_INIT(); \
INIT_LIST_HEAD(&(_work)->entry); \
PREPARE_WORK((_work), (_func)); \
} while (0)
#endif
2006-11-22 21:55:48 +07:00
#define INIT_WORK(_work, _func) \
do { \
__INIT_WORK((_work), (_func), 0); \
} while (0)
#define INIT_WORK_ON_STACK(_work, _func) \
do { \
__INIT_WORK((_work), (_func), 1); \
} while (0)
2006-11-22 21:55:48 +07:00
#define INIT_DELAYED_WORK(_work, _func) \
do { \
INIT_WORK(&(_work)->work, (_func)); \
init_timer(&(_work)->timer); \
} while (0)
#define INIT_DELAYED_WORK_ON_STACK(_work, _func) \
do { \
INIT_WORK_ON_STACK(&(_work)->work, (_func)); \
init_timer_on_stack(&(_work)->timer); \
} while (0)
#define INIT_DELAYED_WORK_DEFERRABLE(_work, _func) \
do { \
INIT_WORK(&(_work)->work, (_func)); \
init_timer_deferrable(&(_work)->timer); \
} while (0)
/**
* work_pending - Find out whether a work item is currently pending
* @work: The work item in question
*/
#define work_pending(work) \
test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))
/**
* delayed_work_pending - Find out whether a delayable work item is currently
* pending
* @work: The work item in question
*/
#define delayed_work_pending(w) \
work_pending(&(w)->work)
2006-11-22 21:55:48 +07:00
/**
* work_clear_pending - for internal use only, mark a work item as not pending
* @work: The work item in question
2006-11-22 21:55:48 +07:00
*/
#define work_clear_pending(work) \
clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))
2006-11-22 21:55:48 +07:00
enum {
WQ_NON_REENTRANT = 1 << 0, /* guarantee non-reentrance */
WQ_UNBOUND = 1 << 1, /* not bound to any cpu */
WQ_FREEZEABLE = 1 << 2, /* freeze during suspend */
workqueue: implement concurrency managed dynamic worker pool Instead of creating a worker for each cwq and putting it into the shared pool, manage per-cpu workers dynamically. Works aren't supposed to be cpu cycle hogs and maintaining just enough concurrency to prevent work processing from stalling due to lack of processing context is optimal. gcwq keeps the number of concurrent active workers to minimum but no less. As long as there's one or more running workers on the cpu, no new worker is scheduled so that works can be processed in batch as much as possible but when the last running worker blocks, gcwq immediately schedules new worker so that the cpu doesn't sit idle while there are works to be processed. gcwq always keeps at least single idle worker around. When a new worker is necessary and the worker is the last idle one, the worker assumes the role of "manager" and manages the worker pool - ie. creates another worker. Forward-progress is guaranteed by having dedicated rescue workers for workqueues which may be necessary while creating a new worker. When the manager is having problem creating a new worker, mayday timer activates and rescue workers are summoned to the cpu and execute works which might be necessary to create new workers. Trustee is expanded to serve the role of manager while a CPU is being taken down and stays down. As no new works are supposed to be queued on a dead cpu, it just needs to drain all the existing ones. Trustee continues to try to create new workers and summon rescuers as long as there are pending works. If the CPU is brought back up while the trustee is still trying to drain the gcwq from the previous offlining, the trustee will kill all idles ones and tell workers which are still busy to rebind to the cpu, and pass control over to gcwq which assumes the manager role as necessary. Concurrency managed worker pool reduces the number of workers drastically. Only workers which are necessary to keep the processing going are created and kept. Also, it reduces cache footprint by avoiding unnecessarily switching contexts between different workers. Please note that this patch does not increase max_active of any workqueue. All workqueues can still only process one work per cpu. Signed-off-by: Tejun Heo <tj@kernel.org>
2010-06-29 15:07:14 +07:00
WQ_RESCUER = 1 << 3, /* has an rescue worker */
WQ_HIGHPRI = 1 << 4, /* high priority */
WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */
WQ_DYING = 1 << 6, /* internal: workqueue is dying */
WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */
WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */
WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2,
};
/* unbound wq's aren't per-cpu, scale max_active according to #cpus */
#define WQ_UNBOUND_MAX_ACTIVE \
max_t(int, WQ_MAX_ACTIVE, num_possible_cpus() * WQ_MAX_UNBOUND_PER_CPU)
2006-11-22 21:55:48 +07:00
/*
* System-wide workqueues which are always present.
*
* system_wq is the one used by schedule[_delayed]_work[_on]().
* Multi-CPU multi-threaded. There are users which expect relatively
* short queue flush time. Don't queue works which can run for too
* long.
*
* system_long_wq is similar to system_wq but may host long running
* works. Queue flushing might take relatively long.
*
* system_nrt_wq is non-reentrant and guarantees that any given work
* item is never executed in parallel by multiple CPUs. Queue
* flushing might take relatively long.
*
* system_unbound_wq is unbound workqueue. Workers are not bound to
* any specific CPU, not concurrency managed, and all queued works are
* executed immediately as long as max_active limit is not reached and
* resources are available.
*/
extern struct workqueue_struct *system_wq;
extern struct workqueue_struct *system_long_wq;
extern struct workqueue_struct *system_nrt_wq;
extern struct workqueue_struct *system_unbound_wq;
extern struct workqueue_struct *
__alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
struct lock_class_key *key, const char *lock_name);
#ifdef CONFIG_LOCKDEP
#define alloc_workqueue(name, flags, max_active) \
({ \
static struct lock_class_key __key; \
const char *__lock_name; \
\
if (__builtin_constant_p(name)) \
__lock_name = (name); \
else \
__lock_name = #name; \
\
__alloc_workqueue_key((name), (flags), (max_active), \
&__key, __lock_name); \
})
#else
#define alloc_workqueue(name, flags, max_active) \
__alloc_workqueue_key((name), (flags), (max_active), NULL, NULL)
#endif
#define create_workqueue(name) \
alloc_workqueue((name), WQ_RESCUER, 1)
#define create_freezeable_workqueue(name) \
alloc_workqueue((name), WQ_FREEZEABLE | WQ_UNBOUND | WQ_RESCUER, 1)
#define create_singlethread_workqueue(name) \
alloc_workqueue((name), WQ_UNBOUND | WQ_RESCUER, 1)
extern void destroy_workqueue(struct workqueue_struct *wq);
extern int queue_work(struct workqueue_struct *wq, struct work_struct *work);
extern int queue_work_on(int cpu, struct workqueue_struct *wq,
struct work_struct *work);
extern int queue_delayed_work(struct workqueue_struct *wq,
struct delayed_work *work, unsigned long delay);
extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
struct delayed_work *work, unsigned long delay);
extern void flush_workqueue(struct workqueue_struct *wq);
extern void flush_scheduled_work(void);
extern void flush_delayed_work(struct delayed_work *work);
extern int schedule_work(struct work_struct *work);
extern int schedule_work_on(int cpu, struct work_struct *work);
extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay);
extern int schedule_delayed_work_on(int cpu, struct delayed_work *work,
unsigned long delay);
2006-11-22 21:55:48 +07:00
extern int schedule_on_each_cpu(work_func_t func);
extern int keventd_up(void);
2006-11-22 21:55:48 +07:00
int execute_in_process_context(work_func_t fn, struct execute_work *);
extern int flush_work(struct work_struct *work);
extern int cancel_work_sync(struct work_struct *work);
extern void workqueue_set_max_active(struct workqueue_struct *wq,
int max_active);
extern bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq);
extern unsigned int work_cpu(struct work_struct *work);
extern unsigned int work_busy(struct work_struct *work);
/*
* Kill off a pending schedule_delayed_work(). Note that the work callback
* function may still be running on return from cancel_delayed_work(), unless
* it returns 1 and the work doesn't re-arm itself. Run flush_workqueue() or
* cancel_work_sync() to wait on it.
*/
static inline int cancel_delayed_work(struct delayed_work *work)
{
int ret;
ret = del_timer_sync(&work->timer);
if (ret)
work_clear_pending(&work->work);
return ret;
}
/*
* Like above, but uses del_timer() instead of del_timer_sync(). This means,
* if it returns 0 the timer function may be running and the queueing is in
* progress.
*/
static inline int __cancel_delayed_work(struct delayed_work *work)
{
int ret;
ret = del_timer(&work->timer);
if (ret)
work_clear_pending(&work->work);
return ret;
}
extern int cancel_delayed_work_sync(struct delayed_work *work);
/* Obsolete. use cancel_delayed_work_sync() */
static inline
void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
struct delayed_work *work)
{
cancel_delayed_work_sync(work);
}
/* Obsolete. use cancel_delayed_work_sync() */
static inline
void cancel_rearming_delayed_work(struct delayed_work *work)
{
cancel_delayed_work_sync(work);
}
#ifndef CONFIG_SMP
static inline long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
{
return fn(arg);
}
#else
long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg);
#endif /* CONFIG_SMP */
#ifdef CONFIG_FREEZER
extern void freeze_workqueues_begin(void);
extern bool freeze_workqueues_busy(void);
extern void thaw_workqueues(void);
#endif /* CONFIG_FREEZER */
#ifdef CONFIG_LOCKDEP
int in_workqueue_context(struct workqueue_struct *wq);
#endif
#endif