mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-24 22:10:55 +07:00
49cb2fc42c
The main motivation to add set_tid to clone3() is CRIU. To restore a process with the same PID/TID CRIU currently uses /proc/sys/kernel/ns_last_pid. It writes the desired (PID - 1) to ns_last_pid and then (quickly) does a clone(). This works most of the time, but it is racy. It is also slow as it requires multiple syscalls. Extending clone3() to support *set_tid makes it possible restore a process using CRIU without accessing /proc/sys/kernel/ns_last_pid and race free (as long as the desired PID/TID is available). This clone3() extension places the same restrictions (CAP_SYS_ADMIN) on clone3() with *set_tid as they are currently in place for ns_last_pid. The original version of this change was using a single value for set_tid. At the 2019 LPC, after presenting set_tid, it was, however, decided to change set_tid to an array to enable setting the PID of a process in multiple PID namespaces at the same time. If a process is created in a PID namespace it is possible to influence the PID inside and outside of the PID namespace. Details also in the corresponding selftest. To create a process with the following PIDs: PID NS level Requested PID 0 (host) 31496 1 42 2 1 For that example the two newly introduced parameters to struct clone_args (set_tid and set_tid_size) would need to be: set_tid[0] = 1; set_tid[1] = 42; set_tid[2] = 31496; set_tid_size = 3; If only the PIDs of the two innermost nested PID namespaces should be defined it would look like this: set_tid[0] = 1; set_tid[1] = 42; set_tid_size = 2; The PID of the newly created process would then be the next available free PID in the PID namespace level 0 (host) and 42 in the PID namespace at level 1 and the PID of the process in the innermost PID namespace would be 1. The set_tid array is used to specify the PID of a process starting from the innermost nested PID namespaces up to set_tid_size PID namespaces. set_tid_size cannot be larger then the current PID namespace level. Signed-off-by: Adrian Reber <areber@redhat.com> Reviewed-by: Christian Brauner <christian.brauner@ubuntu.com> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Acked-by: Andrei Vagin <avagin@gmail.com> Link: https://lore.kernel.org/r/20191115123621.142252-1-areber@redhat.com Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
106 lines
2.4 KiB
C
106 lines
2.4 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_PID_NS_H
|
|
#define _LINUX_PID_NS_H
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/bug.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/workqueue.h>
|
|
#include <linux/threads.h>
|
|
#include <linux/nsproxy.h>
|
|
#include <linux/kref.h>
|
|
#include <linux/ns_common.h>
|
|
#include <linux/idr.h>
|
|
|
|
/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
|
|
#define MAX_PID_NS_LEVEL 32
|
|
|
|
struct fs_pin;
|
|
|
|
enum { /* definitions for pid_namespace's hide_pid field */
|
|
HIDEPID_OFF = 0,
|
|
HIDEPID_NO_ACCESS = 1,
|
|
HIDEPID_INVISIBLE = 2,
|
|
};
|
|
|
|
struct pid_namespace {
|
|
struct kref kref;
|
|
struct idr idr;
|
|
struct rcu_head rcu;
|
|
unsigned int pid_allocated;
|
|
struct task_struct *child_reaper;
|
|
struct kmem_cache *pid_cachep;
|
|
unsigned int level;
|
|
struct pid_namespace *parent;
|
|
#ifdef CONFIG_PROC_FS
|
|
struct vfsmount *proc_mnt;
|
|
struct dentry *proc_self;
|
|
struct dentry *proc_thread_self;
|
|
#endif
|
|
#ifdef CONFIG_BSD_PROCESS_ACCT
|
|
struct fs_pin *bacct;
|
|
#endif
|
|
struct user_namespace *user_ns;
|
|
struct ucounts *ucounts;
|
|
struct work_struct proc_work;
|
|
kgid_t pid_gid;
|
|
int hide_pid;
|
|
int reboot; /* group exit code if this pidns was rebooted */
|
|
struct ns_common ns;
|
|
} __randomize_layout;
|
|
|
|
extern struct pid_namespace init_pid_ns;
|
|
|
|
#define PIDNS_ADDING (1U << 31)
|
|
|
|
#ifdef CONFIG_PID_NS
|
|
static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
|
|
{
|
|
if (ns != &init_pid_ns)
|
|
kref_get(&ns->kref);
|
|
return ns;
|
|
}
|
|
|
|
extern struct pid_namespace *copy_pid_ns(unsigned long flags,
|
|
struct user_namespace *user_ns, struct pid_namespace *ns);
|
|
extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
|
|
extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd);
|
|
extern void put_pid_ns(struct pid_namespace *ns);
|
|
|
|
#else /* !CONFIG_PID_NS */
|
|
#include <linux/err.h>
|
|
|
|
static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
|
|
{
|
|
return ns;
|
|
}
|
|
|
|
static inline struct pid_namespace *copy_pid_ns(unsigned long flags,
|
|
struct user_namespace *user_ns, struct pid_namespace *ns)
|
|
{
|
|
if (flags & CLONE_NEWPID)
|
|
ns = ERR_PTR(-EINVAL);
|
|
return ns;
|
|
}
|
|
|
|
static inline void put_pid_ns(struct pid_namespace *ns)
|
|
{
|
|
}
|
|
|
|
static inline void zap_pid_ns_processes(struct pid_namespace *ns)
|
|
{
|
|
BUG();
|
|
}
|
|
|
|
static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif /* CONFIG_PID_NS */
|
|
|
|
extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
|
|
void pidhash_init(void);
|
|
void pid_idr_init(void);
|
|
|
|
#endif /* _LINUX_PID_NS_H */
|