mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
0cfb6aee70
ipc_findkey() used to scan all objects to look for the wanted key. This is slow when using a high number of keys. This change adds an rhashtable of kern_ipc_perm objects in ipc_ids, so that one lookup cease to be O(n). This change gives a 865% improvement of benchmark reaim.jobs_per_min on a 56 threads Intel(R) Xeon(R) CPU E5-2695 v3 @ 2.30GHz with 256G memory [1] Other (more micro) benchmark results, by the author: On an i5 laptop, the following loop executed right after a reboot took, without and with this change: for (int i = 0, k=0x424242; i < KEYS; ++i) semget(k++, 1, IPC_CREAT | 0600); total total max single max single KEYS without with call without call with 1 3.5 4.9 µs 3.5 4.9 10 7.6 8.6 µs 3.7 4.7 32 16.2 15.9 µs 4.3 5.3 100 72.9 41.8 µs 3.7 4.7 1000 5,630.0 502.0 µs * * 10000 1,340,000.0 7,240.0 µs * * 31900 17,600,000.0 22,200.0 µs * * *: unreliable measure: high variance The duration for a lookup-only usage was obtained by the same loop once the keys are present: total total max single max single KEYS without with call without call with 1 2.1 2.5 µs 2.1 2.5 10 4.5 4.8 µs 2.2 2.3 32 13.0 10.8 µs 2.3 2.8 100 82.9 25.1 µs * 2.3 1000 5,780.0 217.0 µs * * 10000 1,470,000.0 2,520.0 µs * * 31900 17,400,000.0 7,810.0 µs * * Finally, executing each semget() in a new process gave, when still summing only the durations of these syscalls: creation: total total KEYS without with 1 3.7 5.0 µs 10 32.9 36.7 µs 32 125.0 109.0 µs 100 523.0 353.0 µs 1000 20,300.0 3,280.0 µs 10000 2,470,000.0 46,700.0 µs 31900 27,800,000.0 219,000.0 µs lookup-only: total total KEYS without with 1 2.5 2.7 µs 10 25.4 24.4 µs 32 106.0 72.6 µs 100 591.0 352.0 µs 1000 22,400.0 2,250.0 µs 10000 2,510,000.0 25,700.0 µs 31900 28,200,000.0 115,000.0 µs [1] http://lkml.kernel.org/r/20170814060507.GE23258@yexl-desktop Link: http://lkml.kernel.org/r/20170815194954.ck32ta2z35yuzpwp@debix Signed-off-by: Guillaume Knispel <guillaume.knispel@supersonicimagine.com> Reviewed-by: Marc Pardo <marc.pardo@supersonicimagine.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Kees Cook <keescook@chromium.org> Cc: Manfred Spraul <manfred@colorfullife.com> Cc: Alexey Dobriyan <adobriyan@gmail.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org> Cc: Ingo Molnar <mingo@kernel.org> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Serge Hallyn <serge@hallyn.com> Cc: Andrey Vagin <avagin@openvz.org> Cc: Guillaume Knispel <guillaume.knispel@supersonicimagine.com> Cc: Marc Pardo <marc.pardo@supersonicimagine.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
164 lines
4.5 KiB
C
164 lines
4.5 KiB
C
#ifndef __IPC_NAMESPACE_H__
|
|
#define __IPC_NAMESPACE_H__
|
|
|
|
#include <linux/err.h>
|
|
#include <linux/idr.h>
|
|
#include <linux/rwsem.h>
|
|
#include <linux/notifier.h>
|
|
#include <linux/nsproxy.h>
|
|
#include <linux/ns_common.h>
|
|
#include <linux/refcount.h>
|
|
#include <linux/rhashtable.h>
|
|
|
|
struct user_namespace;
|
|
|
|
struct ipc_ids {
|
|
int in_use;
|
|
unsigned short seq;
|
|
bool tables_initialized;
|
|
struct rw_semaphore rwsem;
|
|
struct idr ipcs_idr;
|
|
int next_id;
|
|
struct rhashtable key_ht;
|
|
};
|
|
|
|
struct ipc_namespace {
|
|
refcount_t count;
|
|
struct ipc_ids ids[3];
|
|
|
|
int sem_ctls[4];
|
|
int used_sems;
|
|
|
|
unsigned int msg_ctlmax;
|
|
unsigned int msg_ctlmnb;
|
|
unsigned int msg_ctlmni;
|
|
atomic_t msg_bytes;
|
|
atomic_t msg_hdrs;
|
|
|
|
size_t shm_ctlmax;
|
|
size_t shm_ctlall;
|
|
unsigned long shm_tot;
|
|
int shm_ctlmni;
|
|
/*
|
|
* Defines whether IPC_RMID is forced for _all_ shm segments regardless
|
|
* of shmctl()
|
|
*/
|
|
int shm_rmid_forced;
|
|
|
|
struct notifier_block ipcns_nb;
|
|
|
|
/* The kern_mount of the mqueuefs sb. We take a ref on it */
|
|
struct vfsmount *mq_mnt;
|
|
|
|
/* # queues in this ns, protected by mq_lock */
|
|
unsigned int mq_queues_count;
|
|
|
|
/* next fields are set through sysctl */
|
|
unsigned int mq_queues_max; /* initialized to DFLT_QUEUESMAX */
|
|
unsigned int mq_msg_max; /* initialized to DFLT_MSGMAX */
|
|
unsigned int mq_msgsize_max; /* initialized to DFLT_MSGSIZEMAX */
|
|
unsigned int mq_msg_default;
|
|
unsigned int mq_msgsize_default;
|
|
|
|
/* user_ns which owns the ipc ns */
|
|
struct user_namespace *user_ns;
|
|
struct ucounts *ucounts;
|
|
|
|
struct ns_common ns;
|
|
} __randomize_layout;
|
|
|
|
extern struct ipc_namespace init_ipc_ns;
|
|
extern spinlock_t mq_lock;
|
|
|
|
#ifdef CONFIG_SYSVIPC
|
|
extern void shm_destroy_orphaned(struct ipc_namespace *ns);
|
|
#else /* CONFIG_SYSVIPC */
|
|
static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {}
|
|
#endif /* CONFIG_SYSVIPC */
|
|
|
|
#ifdef CONFIG_POSIX_MQUEUE
|
|
extern int mq_init_ns(struct ipc_namespace *ns);
|
|
/*
|
|
* POSIX Message Queue default values:
|
|
*
|
|
* MIN_*: Lowest value an admin can set the maximum unprivileged limit to
|
|
* DFLT_*MAX: Default values for the maximum unprivileged limits
|
|
* DFLT_{MSG,MSGSIZE}: Default values used when the user doesn't supply
|
|
* an attribute to the open call and the queue must be created
|
|
* HARD_*: Highest value the maximums can be set to. These are enforced
|
|
* on CAP_SYS_RESOURCE apps as well making them inviolate (so make them
|
|
* suitably high)
|
|
*
|
|
* POSIX Requirements:
|
|
* Per app minimum openable message queues - 8. This does not map well
|
|
* to the fact that we limit the number of queues on a per namespace
|
|
* basis instead of a per app basis. So, make the default high enough
|
|
* that no given app should have a hard time opening 8 queues.
|
|
* Minimum maximum for HARD_MSGMAX - 32767. I bumped this to 65536.
|
|
* Minimum maximum for HARD_MSGSIZEMAX - POSIX is silent on this. However,
|
|
* we have run into a situation where running applications in the wild
|
|
* require this to be at least 5MB, and preferably 10MB, so I set the
|
|
* value to 16MB in hopes that this user is the worst of the bunch and
|
|
* the new maximum will handle anyone else. I may have to revisit this
|
|
* in the future.
|
|
*/
|
|
#define DFLT_QUEUESMAX 256
|
|
#define MIN_MSGMAX 1
|
|
#define DFLT_MSG 10U
|
|
#define DFLT_MSGMAX 10
|
|
#define HARD_MSGMAX 65536
|
|
#define MIN_MSGSIZEMAX 128
|
|
#define DFLT_MSGSIZE 8192U
|
|
#define DFLT_MSGSIZEMAX 8192
|
|
#define HARD_MSGSIZEMAX (16*1024*1024)
|
|
#else
|
|
static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
|
|
#endif
|
|
|
|
#if defined(CONFIG_IPC_NS)
|
|
extern struct ipc_namespace *copy_ipcs(unsigned long flags,
|
|
struct user_namespace *user_ns, struct ipc_namespace *ns);
|
|
|
|
static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
|
|
{
|
|
if (ns)
|
|
refcount_inc(&ns->count);
|
|
return ns;
|
|
}
|
|
|
|
extern void put_ipc_ns(struct ipc_namespace *ns);
|
|
#else
|
|
static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
|
|
struct user_namespace *user_ns, struct ipc_namespace *ns)
|
|
{
|
|
if (flags & CLONE_NEWIPC)
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
return ns;
|
|
}
|
|
|
|
static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
|
|
{
|
|
return ns;
|
|
}
|
|
|
|
static inline void put_ipc_ns(struct ipc_namespace *ns)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_POSIX_MQUEUE_SYSCTL
|
|
|
|
struct ctl_table_header;
|
|
extern struct ctl_table_header *mq_register_sysctl_table(void);
|
|
|
|
#else /* CONFIG_POSIX_MQUEUE_SYSCTL */
|
|
|
|
static inline struct ctl_table_header *mq_register_sysctl_table(void)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
#endif /* CONFIG_POSIX_MQUEUE_SYSCTL */
|
|
#endif
|