mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-21 20:08:52 +07:00
fa10fed30f
This patch allows to have multiple procfs instances inside the same pid namespace. The aim here is lightweight sandboxes, and to allow that we have to modernize procfs internals. 1) The main aim of this work is to have on embedded systems one supervisor for apps. Right now we have some lightweight sandbox support, however if we create pid namespacess we have to manages all the processes inside too, where our goal is to be able to run a bunch of apps each one inside its own mount namespace without being able to notice each other. We only want to use mount namespaces, and we want procfs to behave more like a real mount point. 2) Linux Security Modules have multiple ptrace paths inside some subsystems, however inside procfs, the implementation does not guarantee that the ptrace() check which triggers the security_ptrace_check() hook will always run. We have the 'hidepid' mount option that can be used to force the ptrace_may_access() check inside has_pid_permissions() to run. The problem is that 'hidepid' is per pid namespace and not attached to the mount point, any remount or modification of 'hidepid' will propagate to all other procfs mounts. This also does not allow to support Yama LSM easily in desktop and user sessions. Yama ptrace scope which restricts ptrace and some other syscalls to be allowed only on inferiors, can be updated to have a per-task context, where the context will be inherited during fork(), clone() and preserved across execve(). If we support multiple private procfs instances, then we may force the ptrace_may_access() on /proc/<pids>/ to always run inside that new procfs instances. This will allow to specifiy on user sessions if we should populate procfs with pids that the user can ptrace or not. By using Yama ptrace scope, some restricted users will only be able to see inferiors inside /proc, they won't even be able to see their other processes. Some software like Chromium, Firefox's crash handler, Wine and others are already using Yama to restrict which processes can be ptracable. With this change this will give the possibility to restrict /proc/<pids>/ but more importantly this will give desktop users a generic and usuable way to specifiy which users should see all processes and which users can not. Side notes: * This covers the lack of seccomp where it is not able to parse arguments, it is easy to install a seccomp filter on direct syscalls that operate on pids, however /proc/<pid>/ is a Linux ABI using filesystem syscalls. With this change LSMs should be able to analyze open/read/write/close... In the new patch set version I removed the 'newinstance' option as suggested by Eric W. Biederman. Selftest has been added to verify new behavior. Signed-off-by: Alexey Gladkov <gladkov.alexey@gmail.com> Reviewed-by: Alexey Dobriyan <adobriyan@gmail.com> Reviewed-by: Kees Cook <keescook@chromium.org> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
92 lines
2.1 KiB
C
92 lines
2.1 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_PID_NS_H
|
|
#define _LINUX_PID_NS_H
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/bug.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/workqueue.h>
|
|
#include <linux/threads.h>
|
|
#include <linux/nsproxy.h>
|
|
#include <linux/kref.h>
|
|
#include <linux/ns_common.h>
|
|
#include <linux/idr.h>
|
|
|
|
/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
|
|
#define MAX_PID_NS_LEVEL 32
|
|
|
|
struct fs_pin;
|
|
|
|
struct pid_namespace {
|
|
struct kref kref;
|
|
struct idr idr;
|
|
struct rcu_head rcu;
|
|
unsigned int pid_allocated;
|
|
struct task_struct *child_reaper;
|
|
struct kmem_cache *pid_cachep;
|
|
unsigned int level;
|
|
struct pid_namespace *parent;
|
|
#ifdef CONFIG_BSD_PROCESS_ACCT
|
|
struct fs_pin *bacct;
|
|
#endif
|
|
struct user_namespace *user_ns;
|
|
struct ucounts *ucounts;
|
|
int reboot; /* group exit code if this pidns was rebooted */
|
|
struct ns_common ns;
|
|
} __randomize_layout;
|
|
|
|
extern struct pid_namespace init_pid_ns;
|
|
|
|
#define PIDNS_ADDING (1U << 31)
|
|
|
|
#ifdef CONFIG_PID_NS
|
|
static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
|
|
{
|
|
if (ns != &init_pid_ns)
|
|
kref_get(&ns->kref);
|
|
return ns;
|
|
}
|
|
|
|
extern struct pid_namespace *copy_pid_ns(unsigned long flags,
|
|
struct user_namespace *user_ns, struct pid_namespace *ns);
|
|
extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
|
|
extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd);
|
|
extern void put_pid_ns(struct pid_namespace *ns);
|
|
|
|
#else /* !CONFIG_PID_NS */
|
|
#include <linux/err.h>
|
|
|
|
static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
|
|
{
|
|
return ns;
|
|
}
|
|
|
|
static inline struct pid_namespace *copy_pid_ns(unsigned long flags,
|
|
struct user_namespace *user_ns, struct pid_namespace *ns)
|
|
{
|
|
if (flags & CLONE_NEWPID)
|
|
ns = ERR_PTR(-EINVAL);
|
|
return ns;
|
|
}
|
|
|
|
static inline void put_pid_ns(struct pid_namespace *ns)
|
|
{
|
|
}
|
|
|
|
static inline void zap_pid_ns_processes(struct pid_namespace *ns)
|
|
{
|
|
BUG();
|
|
}
|
|
|
|
static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif /* CONFIG_PID_NS */
|
|
|
|
extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
|
|
void pidhash_init(void);
|
|
void pid_idr_init(void);
|
|
|
|
#endif /* _LINUX_PID_NS_H */
|