mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-24 04:47:12 +07:00
ec81048cc3
In theory, COMPLETION_INITIALIZER_ONSTACK() should never affect the stack allocation of the caller. However, on some compilers, a temporary structure was allocated for the return value of COMPLETION_INITIALIZER_ONSTACK(). For example in write_journal() with LOCKDEP_COMPLETIONS=y (GCC is 7.1.1): io_comp.comp = COMPLETION_INITIALIZER_ONSTACK(io_comp.comp); 2462: e8 00 00 00 00 callq 2467 <write_journal+0x47> 2467: 48 8d 85 80 fd ff ff lea -0x280(%rbp),%rax 246e: 48 c7 c6 00 00 00 00 mov $0x0,%rsi 2475: 48 c7 c2 00 00 00 00 mov $0x0,%rdx x->done = 0; 247c: c7 85 90 fd ff ff 00 movl $0x0,-0x270(%rbp) 2483: 00 00 00 init_waitqueue_head(&x->wait); 2486: 48 8d 78 18 lea 0x18(%rax),%rdi 248a: e8 00 00 00 00 callq 248f <write_journal+0x6f> if (commit_start + commit_sections <= ic->journal_sections) { 248f: 41 8b 87 a8 00 00 00 mov 0xa8(%r15),%eax io_comp.comp = COMPLETION_INITIALIZER_ONSTACK(io_comp.comp); 2496: 48 8d bd e8 f9 ff ff lea -0x618(%rbp),%rdi 249d: 48 8d b5 90 fd ff ff lea -0x270(%rbp),%rsi 24a4: b9 17 00 00 00 mov $0x17,%ecx 24a9: f3 48 a5 rep movsq %ds:(%rsi),%es:(%rdi) if (commit_start + commit_sections <= ic->journal_sections) { 24ac: 41 39 c6 cmp %eax,%r14d io_comp.comp = COMPLETION_INITIALIZER_ONSTACK(io_comp.comp); 24af: 48 8d bd 90 fd ff ff lea -0x270(%rbp),%rdi 24b6: 48 8d b5 e8 f9 ff ff lea -0x618(%rbp),%rsi 24bd: b9 17 00 00 00 mov $0x17,%ecx 24c2: f3 48 a5 rep movsq %ds:(%rsi),%es:(%rdi) We can obviously see the temporary structure allocated, and the compiler also does two meaningless memcpy with "rep movsq". And according to: https://gcc.gnu.org/onlinedocs/gcc/Statement-Exprs.html#Statement-Exprs The return value of a statement expression is returned by value, so the temporary variable is created in COMPLETION_INITIALIZER_ONSTACK(), and that's why the temporary structures are allocted. To fix this, make the brace block in COMPLETION_INITIALIZER_ONSTACK() return a pointer and dereference it outside the block rather than return the whole structure, in this way, we are able to teach the compiler not to do the unnecessary stack allocation. This could also reduce the stack size even if !LOCKDEP, for example in write_journal(), compiled with gcc 7.1.1, the result of command: objdump -d drivers/md/dm-integrity.o | ./scripts/checkstack.pl x86 before: 0x0000246a write_journal [dm-integrity.o]: 696 after: 0x00002b7a write_journal [dm-integrity.o]: 296 Reported-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Boqun Feng <boqun.feng@gmail.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Arnd Bergmann <arnd@arndb.de> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Byungchul Park <byungchul.park@lge.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: walken@google.com Cc: willy@infradead.org Link: http://lkml.kernel.org/r/20170823152542.5150-3-boqun.feng@gmail.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
153 lines
4.7 KiB
C
153 lines
4.7 KiB
C
#ifndef __LINUX_COMPLETION_H
|
|
#define __LINUX_COMPLETION_H
|
|
|
|
/*
|
|
* (C) Copyright 2001 Linus Torvalds
|
|
*
|
|
* Atomic wait-for-completion handler data structures.
|
|
* See kernel/sched/completion.c for details.
|
|
*/
|
|
|
|
#include <linux/wait.h>
|
|
#ifdef CONFIG_LOCKDEP_COMPLETIONS
|
|
#include <linux/lockdep.h>
|
|
#endif
|
|
|
|
/*
|
|
* struct completion - structure used to maintain state for a "completion"
|
|
*
|
|
* This is the opaque structure used to maintain the state for a "completion".
|
|
* Completions currently use a FIFO to queue threads that have to wait for
|
|
* the "completion" event.
|
|
*
|
|
* See also: complete(), wait_for_completion() (and friends _timeout,
|
|
* _interruptible, _interruptible_timeout, and _killable), init_completion(),
|
|
* reinit_completion(), and macros DECLARE_COMPLETION(),
|
|
* DECLARE_COMPLETION_ONSTACK().
|
|
*/
|
|
struct completion {
|
|
unsigned int done;
|
|
wait_queue_head_t wait;
|
|
#ifdef CONFIG_LOCKDEP_COMPLETIONS
|
|
struct lockdep_map_cross map;
|
|
#endif
|
|
};
|
|
|
|
#ifdef CONFIG_LOCKDEP_COMPLETIONS
|
|
static inline void complete_acquire(struct completion *x)
|
|
{
|
|
lock_acquire_exclusive((struct lockdep_map *)&x->map, 0, 0, NULL, _RET_IP_);
|
|
}
|
|
|
|
static inline void complete_release(struct completion *x)
|
|
{
|
|
lock_release((struct lockdep_map *)&x->map, 0, _RET_IP_);
|
|
}
|
|
|
|
static inline void complete_release_commit(struct completion *x)
|
|
{
|
|
lock_commit_crosslock((struct lockdep_map *)&x->map);
|
|
}
|
|
|
|
#define init_completion(x) \
|
|
do { \
|
|
static struct lock_class_key __key; \
|
|
lockdep_init_map_crosslock((struct lockdep_map *)&(x)->map, \
|
|
"(complete)" #x, \
|
|
&__key, 0); \
|
|
__init_completion(x); \
|
|
} while (0)
|
|
#else
|
|
#define init_completion(x) __init_completion(x)
|
|
static inline void complete_acquire(struct completion *x) {}
|
|
static inline void complete_release(struct completion *x) {}
|
|
static inline void complete_release_commit(struct completion *x) {}
|
|
#endif
|
|
|
|
#ifdef CONFIG_LOCKDEP_COMPLETIONS
|
|
#define COMPLETION_INITIALIZER(work) \
|
|
{ 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait), \
|
|
STATIC_CROSS_LOCKDEP_MAP_INIT("(complete)" #work, &(work)) }
|
|
#else
|
|
#define COMPLETION_INITIALIZER(work) \
|
|
{ 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
|
|
#endif
|
|
|
|
#define COMPLETION_INITIALIZER_ONSTACK(work) \
|
|
(*({ init_completion(&work); &work; }))
|
|
|
|
/**
|
|
* DECLARE_COMPLETION - declare and initialize a completion structure
|
|
* @work: identifier for the completion structure
|
|
*
|
|
* This macro declares and initializes a completion structure. Generally used
|
|
* for static declarations. You should use the _ONSTACK variant for automatic
|
|
* variables.
|
|
*/
|
|
#define DECLARE_COMPLETION(work) \
|
|
struct completion work = COMPLETION_INITIALIZER(work)
|
|
|
|
/*
|
|
* Lockdep needs to run a non-constant initializer for on-stack
|
|
* completions - so we use the _ONSTACK() variant for those that
|
|
* are on the kernel stack:
|
|
*/
|
|
/**
|
|
* DECLARE_COMPLETION_ONSTACK - declare and initialize a completion structure
|
|
* @work: identifier for the completion structure
|
|
*
|
|
* This macro declares and initializes a completion structure on the kernel
|
|
* stack.
|
|
*/
|
|
#ifdef CONFIG_LOCKDEP
|
|
# define DECLARE_COMPLETION_ONSTACK(work) \
|
|
struct completion work = COMPLETION_INITIALIZER_ONSTACK(work)
|
|
#else
|
|
# define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work)
|
|
#endif
|
|
|
|
/**
|
|
* init_completion - Initialize a dynamically allocated completion
|
|
* @x: pointer to completion structure that is to be initialized
|
|
*
|
|
* This inline function will initialize a dynamically created completion
|
|
* structure.
|
|
*/
|
|
static inline void __init_completion(struct completion *x)
|
|
{
|
|
x->done = 0;
|
|
init_waitqueue_head(&x->wait);
|
|
}
|
|
|
|
/**
|
|
* reinit_completion - reinitialize a completion structure
|
|
* @x: pointer to completion structure that is to be reinitialized
|
|
*
|
|
* This inline function should be used to reinitialize a completion structure so it can
|
|
* be reused. This is especially important after complete_all() is used.
|
|
*/
|
|
static inline void reinit_completion(struct completion *x)
|
|
{
|
|
x->done = 0;
|
|
}
|
|
|
|
extern void wait_for_completion(struct completion *);
|
|
extern void wait_for_completion_io(struct completion *);
|
|
extern int wait_for_completion_interruptible(struct completion *x);
|
|
extern int wait_for_completion_killable(struct completion *x);
|
|
extern unsigned long wait_for_completion_timeout(struct completion *x,
|
|
unsigned long timeout);
|
|
extern unsigned long wait_for_completion_io_timeout(struct completion *x,
|
|
unsigned long timeout);
|
|
extern long wait_for_completion_interruptible_timeout(
|
|
struct completion *x, unsigned long timeout);
|
|
extern long wait_for_completion_killable_timeout(
|
|
struct completion *x, unsigned long timeout);
|
|
extern bool try_wait_for_completion(struct completion *x);
|
|
extern bool completion_done(struct completion *x);
|
|
|
|
extern void complete(struct completion *);
|
|
extern void complete_all(struct completion *);
|
|
|
|
#endif
|