mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
f7bf3df8be
On large systems we'd like to allow a larger number of message queues. In some cases up to 32K. However simply setting MSGMNI to a larger value may cause problems for smaller systems. The first patch of this series introduces a default maximum number of message queue ids that scales with the amount of lowmem. Since msgmni is per namespace and there is no amount of memory dedicated to each namespace so far, the second patch of this series scales msgmni to the number of ipc namespaces too. Since msgmni depends on the amount of memory, it becomes necessary to recompute it upon memory add/remove. In the 4th patch, memory hotplug management is added: a notifier block is registered into the memory hotplug notifier chain for the ipc subsystem. Since the ipc namespaces are not linked together, they have their own notification chain: one notifier_block is defined per ipc namespace. Each time an ipc namespace is created (removed) it registers (unregisters) its notifier block in (from) the ipcns chain. The callback routine registered in the memory chain invokes the ipcns notifier chain with the IPCNS_MEMCHANGE event. Each callback routine registered in the ipcns namespace, in turn, recomputes msgmni for the owning namespace. The 5th patch makes it possible to keep the memory hotplug notifier chain's lock for a lesser amount of time: instead of directly notifying the ipcns notifier chain upon memory add/remove, a work item is added to the global workqueue. When activated, this work item is the one who notifies the ipcns notifier chain. Since msgmni depends on the number of ipc namespaces, it becomes necessary to recompute it upon ipc namespace creation / removal. The 6th patch uses the ipc namespace notifier chain for that purpose: that chain is notified each time an ipc namespace is created or removed. This makes it possible to recompute msgmni for all the namespaces each time one of them is created or removed. When msgmni is explicitely set from userspace, we should avoid recomputing it upon memory add/remove or ipcns creation/removal. This is what the 7th patch does: it simply unregisters the ipcns callback routine as soon as msgmni has been changed from procfs or sysctl(). Even if msgmni is set by hand, it should be possible to make it back automatically recomputed upon memory add/remove or ipcns creation/removal. This what is achieved in patch 8: if set to a negative value, msgmni is added back to the ipcns notifier chain, making it automatically recomputed again. This patch: Compute msg_ctlmni to make it scale with the amount of lowmem. msg_ctlmni is now set to make the message queues occupy 1/32 of the available lowmem. Some cleaning has also been done for the MSGPOOL constant: the msgctl man page says it's not used, but it also defines it as a size in bytes (the code expresses it in Kbytes). Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net> Cc: Yasunori Goto <y-goto@jp.fujitsu.com> Cc: Matt Helsley <matthltc@us.ibm.com> Cc: Mingming Cao <cmm@us.ibm.com> Cc: Pierre Peiffer <pierre.peiffer@bull.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
113 lines
3.6 KiB
C
113 lines
3.6 KiB
C
#ifndef _LINUX_MSG_H
|
|
#define _LINUX_MSG_H
|
|
|
|
#include <linux/ipc.h>
|
|
|
|
/* ipcs ctl commands */
|
|
#define MSG_STAT 11
|
|
#define MSG_INFO 12
|
|
|
|
/* msgrcv options */
|
|
#define MSG_NOERROR 010000 /* no error if message is too big */
|
|
#define MSG_EXCEPT 020000 /* recv any msg except of specified type.*/
|
|
|
|
/* Obsolete, used only for backwards compatibility and libc5 compiles */
|
|
struct msqid_ds {
|
|
struct ipc_perm msg_perm;
|
|
struct msg *msg_first; /* first message on queue,unused */
|
|
struct msg *msg_last; /* last message in queue,unused */
|
|
__kernel_time_t msg_stime; /* last msgsnd time */
|
|
__kernel_time_t msg_rtime; /* last msgrcv time */
|
|
__kernel_time_t msg_ctime; /* last change time */
|
|
unsigned long msg_lcbytes; /* Reuse junk fields for 32 bit */
|
|
unsigned long msg_lqbytes; /* ditto */
|
|
unsigned short msg_cbytes; /* current number of bytes on queue */
|
|
unsigned short msg_qnum; /* number of messages in queue */
|
|
unsigned short msg_qbytes; /* max number of bytes on queue */
|
|
__kernel_ipc_pid_t msg_lspid; /* pid of last msgsnd */
|
|
__kernel_ipc_pid_t msg_lrpid; /* last receive pid */
|
|
};
|
|
|
|
/* Include the definition of msqid64_ds */
|
|
#include <asm/msgbuf.h>
|
|
|
|
/* message buffer for msgsnd and msgrcv calls */
|
|
struct msgbuf {
|
|
long mtype; /* type of message */
|
|
char mtext[1]; /* message text */
|
|
};
|
|
|
|
/* buffer for msgctl calls IPC_INFO, MSG_INFO */
|
|
struct msginfo {
|
|
int msgpool;
|
|
int msgmap;
|
|
int msgmax;
|
|
int msgmnb;
|
|
int msgmni;
|
|
int msgssz;
|
|
int msgtql;
|
|
unsigned short msgseg;
|
|
};
|
|
|
|
/*
|
|
* Scaling factor to compute msgmni:
|
|
* the memory dedicated to msg queues (msgmni * msgmnb) should occupy
|
|
* at most 1/MSG_MEM_SCALE of the lowmem (see the formula in ipc/msg.c):
|
|
* up to 8MB : msgmni = 16 (MSGMNI)
|
|
* 4 GB : msgmni = 8K
|
|
* more than 16 GB : msgmni = 32K (IPCMNI)
|
|
*/
|
|
#define MSG_MEM_SCALE 32
|
|
|
|
#define MSGMNI 16 /* <= IPCMNI */ /* max # of msg queue identifiers */
|
|
#define MSGMAX 8192 /* <= INT_MAX */ /* max size of message (bytes) */
|
|
#define MSGMNB 16384 /* <= INT_MAX */ /* default max size of a message queue */
|
|
|
|
/* unused */
|
|
#define MSGPOOL (MSGMNI * MSGMNB) /* size in bytes of message pool */
|
|
#define MSGTQL MSGMNB /* number of system message headers */
|
|
#define MSGMAP MSGMNB /* number of entries in message map */
|
|
#define MSGSSZ 16 /* message segment size */
|
|
#define __MSGSEG (MSGPOOL / MSGSSZ) /* max no. of segments */
|
|
#define MSGSEG (__MSGSEG <= 0xffff ? __MSGSEG : 0xffff)
|
|
|
|
#ifdef __KERNEL__
|
|
#include <linux/list.h>
|
|
|
|
/* one msg_msg structure for each message */
|
|
struct msg_msg {
|
|
struct list_head m_list;
|
|
long m_type;
|
|
int m_ts; /* message text size */
|
|
struct msg_msgseg* next;
|
|
void *security;
|
|
/* the actual message follows immediately */
|
|
};
|
|
|
|
/* one msq_queue structure for each present queue on the system */
|
|
struct msg_queue {
|
|
struct kern_ipc_perm q_perm;
|
|
time_t q_stime; /* last msgsnd time */
|
|
time_t q_rtime; /* last msgrcv time */
|
|
time_t q_ctime; /* last change time */
|
|
unsigned long q_cbytes; /* current number of bytes on queue */
|
|
unsigned long q_qnum; /* number of messages in queue */
|
|
unsigned long q_qbytes; /* max number of bytes on queue */
|
|
pid_t q_lspid; /* pid of last msgsnd */
|
|
pid_t q_lrpid; /* last receive pid */
|
|
|
|
struct list_head q_messages;
|
|
struct list_head q_receivers;
|
|
struct list_head q_senders;
|
|
};
|
|
|
|
/* Helper routines for sys_msgsnd and sys_msgrcv */
|
|
extern long do_msgsnd(int msqid, long mtype, void __user *mtext,
|
|
size_t msgsz, int msgflg);
|
|
extern long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
|
|
size_t msgsz, long msgtyp, int msgflg);
|
|
|
|
#endif /* __KERNEL__ */
|
|
|
|
#endif /* _LINUX_MSG_H */
|