mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-25 05:59:34 +07:00
df0108c5da
Currently, epoll file descriptors or epfds (the fd returned from epoll_create[1]()) that are added to a shared wakeup source are always added in a non-exclusive manner. This means that when we have multiple epfds attached to a shared fd source they are all woken up. This creates thundering herd type behavior. Introduce a new 'EPOLLEXCLUSIVE' flag that can be passed as part of the 'event' argument during an epoll_ctl() EPOLL_CTL_ADD operation. This new flag allows for exclusive wakeups when there are multiple epfds attached to a shared fd event source. The implementation walks the list of exclusive waiters, and queues an event to each epfd, until it finds the first waiter that has threads blocked on it via epoll_wait(). The idea is to search for threads which are idle and ready to process the wakeup events. Thus, we queue an event to at least 1 epfd, but may still potentially queue an event to all epfds that are attached to the shared fd source. Performance testing was done by Madars Vitolins using a modified version of Enduro/X. The use of the 'EPOLLEXCLUSIVE' flag reduce the length of this particular workload from 860s down to 24s. Sample epoll_clt text: EPOLLEXCLUSIVE Sets an exclusive wakeup mode for the epfd file descriptor that is being attached to the target file descriptor, fd. Thus, when an event occurs and multiple epfd file descriptors are attached to the same target file using EPOLLEXCLUSIVE, one or more epfds will receive an event with epoll_wait(2). The default in this scenario (when EPOLLEXCLUSIVE is not set) is for all epfds to receive an event. EPOLLEXCLUSIVE may only be specified with the op EPOLL_CTL_ADD. Signed-off-by: Jason Baron <jbaron@akamai.com> Tested-by: Madars Vitolins <m@silodev.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Al Viro <viro@ftp.linux.org.uk> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Eric Wong <normalperson@yhbt.net> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Hagen Paul Pfeifer <hagen@jauu.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
80 lines
2.1 KiB
C
80 lines
2.1 KiB
C
/*
|
|
* include/linux/eventpoll.h ( Efficient event polling implementation )
|
|
* Copyright (C) 2001,...,2006 Davide Libenzi
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Davide Libenzi <davidel@xmailserver.org>
|
|
*
|
|
*/
|
|
|
|
#ifndef _UAPI_LINUX_EVENTPOLL_H
|
|
#define _UAPI_LINUX_EVENTPOLL_H
|
|
|
|
/* For O_CLOEXEC */
|
|
#include <linux/fcntl.h>
|
|
#include <linux/types.h>
|
|
|
|
/* Flags for epoll_create1. */
|
|
#define EPOLL_CLOEXEC O_CLOEXEC
|
|
|
|
/* Valid opcodes to issue to sys_epoll_ctl() */
|
|
#define EPOLL_CTL_ADD 1
|
|
#define EPOLL_CTL_DEL 2
|
|
#define EPOLL_CTL_MOD 3
|
|
|
|
/* Set exclusive wakeup mode for the target file descriptor */
|
|
#define EPOLLEXCLUSIVE (1 << 28)
|
|
|
|
/*
|
|
* Request the handling of system wakeup events so as to prevent system suspends
|
|
* from happening while those events are being processed.
|
|
*
|
|
* Assuming neither EPOLLET nor EPOLLONESHOT is set, system suspends will not be
|
|
* re-allowed until epoll_wait is called again after consuming the wakeup
|
|
* event(s).
|
|
*
|
|
* Requires CAP_BLOCK_SUSPEND
|
|
*/
|
|
#define EPOLLWAKEUP (1 << 29)
|
|
|
|
/* Set the One Shot behaviour for the target file descriptor */
|
|
#define EPOLLONESHOT (1 << 30)
|
|
|
|
/* Set the Edge Triggered behaviour for the target file descriptor */
|
|
#define EPOLLET (1 << 31)
|
|
|
|
/*
|
|
* On x86-64 make the 64bit structure have the same alignment as the
|
|
* 32bit structure. This makes 32bit emulation easier.
|
|
*
|
|
* UML/x86_64 needs the same packing as x86_64
|
|
*/
|
|
#ifdef __x86_64__
|
|
#define EPOLL_PACKED __attribute__((packed))
|
|
#else
|
|
#define EPOLL_PACKED
|
|
#endif
|
|
|
|
struct epoll_event {
|
|
__u32 events;
|
|
__u64 data;
|
|
} EPOLL_PACKED;
|
|
|
|
#ifdef CONFIG_PM_SLEEP
|
|
static inline void ep_take_care_of_epollwakeup(struct epoll_event *epev)
|
|
{
|
|
if ((epev->events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND))
|
|
epev->events &= ~EPOLLWAKEUP;
|
|
}
|
|
#else
|
|
static inline void ep_take_care_of_epollwakeup(struct epoll_event *epev)
|
|
{
|
|
epev->events &= ~EPOLLWAKEUP;
|
|
}
|
|
#endif
|
|
#endif /* _UAPI_LINUX_EVENTPOLL_H */
|