linux_dsm_epyc7002/include/linux/signal.h
Waiman Long c7be96af89 signals: avoid unnecessary taking of sighand->siglock
When running certain database workload on a high-end system with many
CPUs, it was found that spinlock contention in the sigprocmask syscalls
became a significant portion of the overall CPU cycles as shown below.

  9.30%  9.30%  905387  dataserver  /proc/kcore 0x7fff8163f4d2
  [k] _raw_spin_lock_irq
            |
            ---_raw_spin_lock_irq
               |
               |--99.34%-- __set_current_blocked
               |          sigprocmask
               |          sys_rt_sigprocmask
               |          system_call_fastpath
               |          |
               |          |--50.63%-- __swapcontext
               |          |          |
               |          |          |--99.91%-- upsleepgeneric
               |          |
               |          |--49.36%-- __setcontext
               |          |          ktskRun

Looking further into the swapcontext function in glibc, it was found that
the function always call sigprocmask() without checking if there are
changes in the signal mask.

A check was added to the __set_current_blocked() function to avoid taking
the sighand->siglock spinlock if there is no change in the signal mask.
This will prevent unneeded spinlock contention when many threads are
trying to call sigprocmask().

With this patch applied, the spinlock contention in sigprocmask() was
gone.

Link: http://lkml.kernel.org/r/1474979209-11867-1-git-send-email-Waiman.Long@hpe.com
Signed-off-by: Waiman Long <Waiman.Long@hpe.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Stas Sergeev <stsp@list.ru>
Cc: Scott J Norton <scott.norton@hpe.com>
Cc: Douglas Hatch <doug.hatch@hpe.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-12-14 16:04:07 -08:00

477 lines
13 KiB
C

#ifndef _LINUX_SIGNAL_H
#define _LINUX_SIGNAL_H
#include <linux/list.h>
#include <linux/bug.h>
#include <uapi/linux/signal.h>
struct task_struct;
/* for sysctl */
extern int print_fatal_signals;
/*
* Real Time signals may be queued.
*/
struct sigqueue {
struct list_head list;
int flags;
siginfo_t info;
struct user_struct *user;
};
/* flags values. */
#define SIGQUEUE_PREALLOC 1
struct sigpending {
struct list_head list;
sigset_t signal;
};
#ifndef HAVE_ARCH_COPY_SIGINFO
#include <linux/string.h>
static inline void copy_siginfo(struct siginfo *to, struct siginfo *from)
{
if (from->si_code < 0)
memcpy(to, from, sizeof(*to));
else
/* _sigchld is currently the largest know union member */
memcpy(to, from, __ARCH_SI_PREAMBLE_SIZE + sizeof(from->_sifields._sigchld));
}
#endif
/*
* Define some primitives to manipulate sigset_t.
*/
#ifndef __HAVE_ARCH_SIG_BITOPS
#include <linux/bitops.h>
/* We don't use <linux/bitops.h> for these because there is no need to
be atomic. */
static inline void sigaddset(sigset_t *set, int _sig)
{
unsigned long sig = _sig - 1;
if (_NSIG_WORDS == 1)
set->sig[0] |= 1UL << sig;
else
set->sig[sig / _NSIG_BPW] |= 1UL << (sig % _NSIG_BPW);
}
static inline void sigdelset(sigset_t *set, int _sig)
{
unsigned long sig = _sig - 1;
if (_NSIG_WORDS == 1)
set->sig[0] &= ~(1UL << sig);
else
set->sig[sig / _NSIG_BPW] &= ~(1UL << (sig % _NSIG_BPW));
}
static inline int sigismember(sigset_t *set, int _sig)
{
unsigned long sig = _sig - 1;
if (_NSIG_WORDS == 1)
return 1 & (set->sig[0] >> sig);
else
return 1 & (set->sig[sig / _NSIG_BPW] >> (sig % _NSIG_BPW));
}
#endif /* __HAVE_ARCH_SIG_BITOPS */
static inline int sigisemptyset(sigset_t *set)
{
switch (_NSIG_WORDS) {
case 4:
return (set->sig[3] | set->sig[2] |
set->sig[1] | set->sig[0]) == 0;
case 2:
return (set->sig[1] | set->sig[0]) == 0;
case 1:
return set->sig[0] == 0;
default:
BUILD_BUG();
return 0;
}
}
static inline int sigequalsets(const sigset_t *set1, const sigset_t *set2)
{
switch (_NSIG_WORDS) {
case 4:
return (set1->sig[3] == set2->sig[3]) &&
(set1->sig[2] == set2->sig[2]) &&
(set1->sig[1] == set2->sig[1]) &&
(set1->sig[0] == set2->sig[0]);
case 2:
return (set1->sig[1] == set2->sig[1]) &&
(set1->sig[0] == set2->sig[0]);
case 1:
return set1->sig[0] == set2->sig[0];
}
return 0;
}
#define sigmask(sig) (1UL << ((sig) - 1))
#ifndef __HAVE_ARCH_SIG_SETOPS
#include <linux/string.h>
#define _SIG_SET_BINOP(name, op) \
static inline void name(sigset_t *r, const sigset_t *a, const sigset_t *b) \
{ \
unsigned long a0, a1, a2, a3, b0, b1, b2, b3; \
\
switch (_NSIG_WORDS) { \
case 4: \
a3 = a->sig[3]; a2 = a->sig[2]; \
b3 = b->sig[3]; b2 = b->sig[2]; \
r->sig[3] = op(a3, b3); \
r->sig[2] = op(a2, b2); \
case 2: \
a1 = a->sig[1]; b1 = b->sig[1]; \
r->sig[1] = op(a1, b1); \
case 1: \
a0 = a->sig[0]; b0 = b->sig[0]; \
r->sig[0] = op(a0, b0); \
break; \
default: \
BUILD_BUG(); \
} \
}
#define _sig_or(x,y) ((x) | (y))
_SIG_SET_BINOP(sigorsets, _sig_or)
#define _sig_and(x,y) ((x) & (y))
_SIG_SET_BINOP(sigandsets, _sig_and)
#define _sig_andn(x,y) ((x) & ~(y))
_SIG_SET_BINOP(sigandnsets, _sig_andn)
#undef _SIG_SET_BINOP
#undef _sig_or
#undef _sig_and
#undef _sig_andn
#define _SIG_SET_OP(name, op) \
static inline void name(sigset_t *set) \
{ \
switch (_NSIG_WORDS) { \
case 4: set->sig[3] = op(set->sig[3]); \
set->sig[2] = op(set->sig[2]); \
case 2: set->sig[1] = op(set->sig[1]); \
case 1: set->sig[0] = op(set->sig[0]); \
break; \
default: \
BUILD_BUG(); \
} \
}
#define _sig_not(x) (~(x))
_SIG_SET_OP(signotset, _sig_not)
#undef _SIG_SET_OP
#undef _sig_not
static inline void sigemptyset(sigset_t *set)
{
switch (_NSIG_WORDS) {
default:
memset(set, 0, sizeof(sigset_t));
break;
case 2: set->sig[1] = 0;
case 1: set->sig[0] = 0;
break;
}
}
static inline void sigfillset(sigset_t *set)
{
switch (_NSIG_WORDS) {
default:
memset(set, -1, sizeof(sigset_t));
break;
case 2: set->sig[1] = -1;
case 1: set->sig[0] = -1;
break;
}
}
/* Some extensions for manipulating the low 32 signals in particular. */
static inline void sigaddsetmask(sigset_t *set, unsigned long mask)
{
set->sig[0] |= mask;
}
static inline void sigdelsetmask(sigset_t *set, unsigned long mask)
{
set->sig[0] &= ~mask;
}
static inline int sigtestsetmask(sigset_t *set, unsigned long mask)
{
return (set->sig[0] & mask) != 0;
}
static inline void siginitset(sigset_t *set, unsigned long mask)
{
set->sig[0] = mask;
switch (_NSIG_WORDS) {
default:
memset(&set->sig[1], 0, sizeof(long)*(_NSIG_WORDS-1));
break;
case 2: set->sig[1] = 0;
case 1: ;
}
}
static inline void siginitsetinv(sigset_t *set, unsigned long mask)
{
set->sig[0] = ~mask;
switch (_NSIG_WORDS) {
default:
memset(&set->sig[1], -1, sizeof(long)*(_NSIG_WORDS-1));
break;
case 2: set->sig[1] = -1;
case 1: ;
}
}
#endif /* __HAVE_ARCH_SIG_SETOPS */
static inline void init_sigpending(struct sigpending *sig)
{
sigemptyset(&sig->signal);
INIT_LIST_HEAD(&sig->list);
}
extern void flush_sigqueue(struct sigpending *queue);
/* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
static inline int valid_signal(unsigned long sig)
{
return sig <= _NSIG ? 1 : 0;
}
struct timespec;
struct pt_regs;
extern int next_signal(struct sigpending *pending, sigset_t *mask);
extern int do_send_sig_info(int sig, struct siginfo *info,
struct task_struct *p, bool group);
extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
extern int do_sigtimedwait(const sigset_t *, siginfo_t *,
const struct timespec *);
extern int sigprocmask(int, sigset_t *, sigset_t *);
extern void set_current_blocked(sigset_t *);
extern void __set_current_blocked(const sigset_t *);
extern int show_unhandled_signals;
struct sigaction {
#ifndef __ARCH_HAS_IRIX_SIGACTION
__sighandler_t sa_handler;
unsigned long sa_flags;
#else
unsigned int sa_flags;
__sighandler_t sa_handler;
#endif
#ifdef __ARCH_HAS_SA_RESTORER
__sigrestore_t sa_restorer;
#endif
sigset_t sa_mask; /* mask last for extensibility */
};
struct k_sigaction {
struct sigaction sa;
#ifdef __ARCH_HAS_KA_RESTORER
__sigrestore_t ka_restorer;
#endif
};
#ifdef CONFIG_OLD_SIGACTION
struct old_sigaction {
__sighandler_t sa_handler;
old_sigset_t sa_mask;
unsigned long sa_flags;
__sigrestore_t sa_restorer;
};
#endif
struct ksignal {
struct k_sigaction ka;
siginfo_t info;
int sig;
};
extern int get_signal(struct ksignal *ksig);
extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping);
extern void exit_signals(struct task_struct *tsk);
extern void kernel_sigaction(int, __sighandler_t);
static inline void allow_signal(int sig)
{
/*
* Kernel threads handle their own signals. Let the signal code
* know it'll be handled, so that they don't get converted to
* SIGKILL or just silently dropped.
*/
kernel_sigaction(sig, (__force __sighandler_t)2);
}
static inline void disallow_signal(int sig)
{
kernel_sigaction(sig, SIG_IGN);
}
extern struct kmem_cache *sighand_cachep;
int unhandled_signal(struct task_struct *tsk, int sig);
/*
* In POSIX a signal is sent either to a specific thread (Linux task)
* or to the process as a whole (Linux thread group). How the signal
* is sent determines whether it's to one thread or the whole group,
* which determines which signal mask(s) are involved in blocking it
* from being delivered until later. When the signal is delivered,
* either it's caught or ignored by a user handler or it has a default
* effect that applies to the whole thread group (POSIX process).
*
* The possible effects an unblocked signal set to SIG_DFL can have are:
* ignore - Nothing Happens
* terminate - kill the process, i.e. all threads in the group,
* similar to exit_group. The group leader (only) reports
* WIFSIGNALED status to its parent.
* coredump - write a core dump file describing all threads using
* the same mm and then kill all those threads
* stop - stop all the threads in the group, i.e. TASK_STOPPED state
*
* SIGKILL and SIGSTOP cannot be caught, blocked, or ignored.
* Other signals when not blocked and set to SIG_DFL behaves as follows.
* The job control signals also have other special effects.
*
* +--------------------+------------------+
* | POSIX signal | default action |
* +--------------------+------------------+
* | SIGHUP | terminate |
* | SIGINT | terminate |
* | SIGQUIT | coredump |
* | SIGILL | coredump |
* | SIGTRAP | coredump |
* | SIGABRT/SIGIOT | coredump |
* | SIGBUS | coredump |
* | SIGFPE | coredump |
* | SIGKILL | terminate(+) |
* | SIGUSR1 | terminate |
* | SIGSEGV | coredump |
* | SIGUSR2 | terminate |
* | SIGPIPE | terminate |
* | SIGALRM | terminate |
* | SIGTERM | terminate |
* | SIGCHLD | ignore |
* | SIGCONT | ignore(*) |
* | SIGSTOP | stop(*)(+) |
* | SIGTSTP | stop(*) |
* | SIGTTIN | stop(*) |
* | SIGTTOU | stop(*) |
* | SIGURG | ignore |
* | SIGXCPU | coredump |
* | SIGXFSZ | coredump |
* | SIGVTALRM | terminate |
* | SIGPROF | terminate |
* | SIGPOLL/SIGIO | terminate |
* | SIGSYS/SIGUNUSED | coredump |
* | SIGSTKFLT | terminate |
* | SIGWINCH | ignore |
* | SIGPWR | terminate |
* | SIGRTMIN-SIGRTMAX | terminate |
* +--------------------+------------------+
* | non-POSIX signal | default action |
* +--------------------+------------------+
* | SIGEMT | coredump |
* +--------------------+------------------+
*
* (+) For SIGKILL and SIGSTOP the action is "always", not just "default".
* (*) Special job control effects:
* When SIGCONT is sent, it resumes the process (all threads in the group)
* from TASK_STOPPED state and also clears any pending/queued stop signals
* (any of those marked with "stop(*)"). This happens regardless of blocking,
* catching, or ignoring SIGCONT. When any stop signal is sent, it clears
* any pending/queued SIGCONT signals; this happens regardless of blocking,
* catching, or ignored the stop signal, though (except for SIGSTOP) the
* default action of stopping the process may happen later or never.
*/
#ifdef SIGEMT
#define SIGEMT_MASK rt_sigmask(SIGEMT)
#else
#define SIGEMT_MASK 0
#endif
#if SIGRTMIN > BITS_PER_LONG
#define rt_sigmask(sig) (1ULL << ((sig)-1))
#else
#define rt_sigmask(sig) sigmask(sig)
#endif
#define siginmask(sig, mask) \
((sig) < SIGRTMIN && (rt_sigmask(sig) & (mask)))
#define SIG_KERNEL_ONLY_MASK (\
rt_sigmask(SIGKILL) | rt_sigmask(SIGSTOP))
#define SIG_KERNEL_STOP_MASK (\
rt_sigmask(SIGSTOP) | rt_sigmask(SIGTSTP) | \
rt_sigmask(SIGTTIN) | rt_sigmask(SIGTTOU) )
#define SIG_KERNEL_COREDUMP_MASK (\
rt_sigmask(SIGQUIT) | rt_sigmask(SIGILL) | \
rt_sigmask(SIGTRAP) | rt_sigmask(SIGABRT) | \
rt_sigmask(SIGFPE) | rt_sigmask(SIGSEGV) | \
rt_sigmask(SIGBUS) | rt_sigmask(SIGSYS) | \
rt_sigmask(SIGXCPU) | rt_sigmask(SIGXFSZ) | \
SIGEMT_MASK )
#define SIG_KERNEL_IGNORE_MASK (\
rt_sigmask(SIGCONT) | rt_sigmask(SIGCHLD) | \
rt_sigmask(SIGWINCH) | rt_sigmask(SIGURG) )
#define sig_kernel_only(sig) siginmask(sig, SIG_KERNEL_ONLY_MASK)
#define sig_kernel_coredump(sig) siginmask(sig, SIG_KERNEL_COREDUMP_MASK)
#define sig_kernel_ignore(sig) siginmask(sig, SIG_KERNEL_IGNORE_MASK)
#define sig_kernel_stop(sig) siginmask(sig, SIG_KERNEL_STOP_MASK)
#define sig_user_defined(t, signr) \
(((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_DFL) && \
((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_IGN))
#define sig_fatal(t, signr) \
(!siginmask(signr, SIG_KERNEL_IGNORE_MASK|SIG_KERNEL_STOP_MASK) && \
(t)->sighand->action[(signr)-1].sa.sa_handler == SIG_DFL)
void signals_init(void);
int restore_altstack(const stack_t __user *);
int __save_altstack(stack_t __user *, unsigned long);
#define save_altstack_ex(uss, sp) do { \
stack_t __user *__uss = uss; \
struct task_struct *t = current; \
put_user_ex((void __user *)t->sas_ss_sp, &__uss->ss_sp); \
put_user_ex(t->sas_ss_flags, &__uss->ss_flags); \
put_user_ex(t->sas_ss_size, &__uss->ss_size); \
if (t->sas_ss_flags & SS_AUTODISARM) \
sas_ss_reset(t); \
} while (0);
#ifdef CONFIG_PROC_FS
struct seq_file;
extern void render_sigset_t(struct seq_file *, const char *, sigset_t *);
#endif
#endif /* _LINUX_SIGNAL_H */