linux_dsm_epyc7002/drivers/connector/cn_proc.c
Li RongQing 6d2b0f02f5 connector: fix unsafe usage of ->real_parent
proc_exit_connector() uses ->real_parent lockless. This is not
safe that its parent can go away at any moment, so use RCU to
protect it, and ensure that this task is not released.

[  747.624551] ==================================================================
[  747.632946] BUG: KASAN: use-after-free in proc_exit_connector+0x1f7/0x310
[  747.640686] Read of size 4 at addr ffff88a0276988e0 by task sshd/2882
[  747.648032]
[  747.649804] CPU: 11 PID: 2882 Comm: sshd Tainted: G            E     4.19.26-rc2 #11
[  747.658629] Hardware name: IBM x3550M4 -[7914OFV]-/00AM544, BIOS -[D7E142BUS-1.71]- 07/31/2014
[  747.668419] Call Trace:
[  747.671269]  dump_stack+0xf0/0x19b
[  747.675186]  ? show_regs_print_info+0x5/0x5
[  747.679988]  ? kmsg_dump_rewind_nolock+0x59/0x59
[  747.685302]  print_address_description+0x6a/0x270
[  747.691162]  kasan_report+0x258/0x380
[  747.695835]  ? proc_exit_connector+0x1f7/0x310
[  747.701402]  proc_exit_connector+0x1f7/0x310
[  747.706767]  ? proc_coredump_connector+0x2d0/0x2d0
[  747.712715]  ? _raw_write_unlock_irq+0x29/0x50
[  747.718270]  ? _raw_write_unlock_irq+0x29/0x50
[  747.723820]  ? ___preempt_schedule+0x16/0x18
[  747.729193]  ? ___preempt_schedule+0x16/0x18
[  747.734574]  do_exit+0xa11/0x14f0
[  747.738880]  ? mm_update_next_owner+0x590/0x590
[  747.744525]  ? debug_show_all_locks+0x3c0/0x3c0
[  747.761448]  ? ktime_get_coarse_real_ts64+0xeb/0x1c0
[  747.767589]  ? lockdep_hardirqs_on+0x1a6/0x290
[  747.773154]  ? check_chain_key+0x139/0x1f0
[  747.778345]  ? check_flags.part.35+0x240/0x240
[  747.783908]  ? __lock_acquire+0x2300/0x2300
[  747.789171]  ? _raw_spin_unlock_irqrestore+0x59/0x70
[  747.795316]  ? _raw_spin_unlock_irqrestore+0x59/0x70
[  747.801457]  ? do_raw_spin_unlock+0x10f/0x1e0
[  747.806914]  ? do_raw_spin_trylock+0x120/0x120
[  747.812481]  ? preempt_count_sub+0x14/0xc0
[  747.817645]  ? _raw_spin_unlock+0x2e/0x50
[  747.822708]  ? __handle_mm_fault+0x12db/0x1fa0
[  747.828367]  ? __pmd_alloc+0x2d0/0x2d0
[  747.833143]  ? check_noncircular+0x50/0x50
[  747.838309]  ? match_held_lock+0x7f/0x340
[  747.843380]  ? check_noncircular+0x50/0x50
[  747.848561]  ? handle_mm_fault+0x21a/0x5f0
[  747.853730]  ? check_flags.part.35+0x240/0x240
[  747.859290]  ? check_chain_key+0x139/0x1f0
[  747.864474]  ? __do_page_fault+0x40f/0x760
[  747.869655]  ? __audit_syscall_entry+0x4b/0x1f0
[  747.875319]  ? syscall_trace_enter+0x1d5/0x7b0
[  747.880877]  ? trace_raw_output_preemptirq_template+0x90/0x90
[  747.887895]  ? trace_raw_output_sys_exit+0x80/0x80
[  747.893860]  ? up_read+0x3b/0x90
[  747.898142]  ? stop_critical_timings+0x260/0x260
[  747.903909]  do_group_exit+0xe0/0x1c0
[  747.908591]  ? __x64_sys_exit+0x30/0x30
[  747.913460]  ? trace_raw_output_preemptirq_template+0x90/0x90
[  747.920485]  ? tracer_hardirqs_on+0x270/0x270
[  747.925956]  __x64_sys_exit_group+0x28/0x30
[  747.931214]  do_syscall_64+0x117/0x400
[  747.935988]  ? syscall_return_slowpath+0x2f0/0x2f0
[  747.941931]  ? trace_hardirqs_off_thunk+0x1a/0x1c
[  747.947788]  ? trace_hardirqs_on_caller+0x1d0/0x1d0
[  747.953838]  ? lockdep_sys_exit+0x16/0x8e
[  747.958915]  ? trace_hardirqs_off_thunk+0x1a/0x1c
[  747.964784]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  747.971021] RIP: 0033:0x7f572f154c68
[  747.975606] Code: Bad RIP value.
[  747.979791] RSP: 002b:00007ffed2dfaa58 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7
[  747.989324] RAX: ffffffffffffffda RBX: 00007f572f431840 RCX: 00007f572f154c68
[  747.997910] RDX: 0000000000000001 RSI: 000000000000003c RDI: 0000000000000001
[  748.006495] RBP: 0000000000000001 R08: 00000000000000e7 R09: fffffffffffffee0
[  748.015079] R10: 00007f572f4387e8 R11: 0000000000000246 R12: 00007f572f431840
[  748.023664] R13: 000055a7f90f2c50 R14: 000055a7f96e2310 R15: 000055a7f96e2310
[  748.032287]
[  748.034509] Allocated by task 2300:
[  748.038982]  kasan_kmalloc+0xa0/0xd0
[  748.043562]  kmem_cache_alloc_node+0xf5/0x2e0
[  748.049018]  copy_process+0x1781/0x4790
[  748.053884]  _do_fork+0x166/0x9a0
[  748.058163]  do_syscall_64+0x117/0x400
[  748.062943]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  748.069180]
[  748.071405] Freed by task 15395:
[  748.075591]  __kasan_slab_free+0x130/0x180
[  748.080752]  kmem_cache_free+0xc2/0x310
[  748.085619]  free_task+0xea/0x130
[  748.089901]  __put_task_struct+0x177/0x230
[  748.095063]  finish_task_switch+0x51b/0x5d0
[  748.100315]  __schedule+0x506/0xfa0
[  748.104791]  schedule+0xca/0x260
[  748.108978]  futex_wait_queue_me+0x27e/0x420
[  748.114333]  futex_wait+0x251/0x550
[  748.118814]  do_futex+0x75b/0xf80
[  748.123097]  __x64_sys_futex+0x231/0x2a0
[  748.128065]  do_syscall_64+0x117/0x400
[  748.132835]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  748.139066]
[  748.141289] The buggy address belongs to the object at ffff88a027698000
[  748.141289]  which belongs to the cache task_struct of size 12160
[  748.156589] The buggy address is located 2272 bytes inside of
[  748.156589]  12160-byte region [ffff88a027698000, ffff88a02769af80)
[  748.171114] The buggy address belongs to the page:
[  748.177055] page:ffffea00809da600 count:1 mapcount:0 mapping:ffff888107d01e00 index:0x0 compound_mapcount: 0
[  748.189136] flags: 0x57ffffc0008100(slab|head)
[  748.194688] raw: 0057ffffc0008100 ffffea00809a3200 0000000300000003 ffff888107d01e00
[  748.204424] raw: 0000000000000000 0000000000020002 00000001ffffffff 0000000000000000
[  748.214146] page dumped because: kasan: bad access detected
[  748.220976]
[  748.223197] Memory state around the buggy address:
[  748.229128]  ffff88a027698780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  748.238271]  ffff88a027698800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  748.247414] >ffff88a027698880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  748.256564]                                                        ^
[  748.264267]  ffff88a027698900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  748.273493]  ffff88a027698980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  748.282630] ==================================================================

Fixes: b086ff8725 ("connector: add parent pid and tgid to coredump and exit events")
Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-03-08 15:06:38 -08:00

411 lines
11 KiB
C

/*
* cn_proc.c - process events connector
*
* Copyright (C) Matt Helsley, IBM Corp. 2005
* Based on cn_fork.c by Guillaume Thouvenin <guillaume.thouvenin@bull.net>
* Original copyright notice follows:
* Copyright (C) 2005 BULL SA.
*
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kernel.h>
#include <linux/ktime.h>
#include <linux/init.h>
#include <linux/connector.h>
#include <linux/gfp.h>
#include <linux/ptrace.h>
#include <linux/atomic.h>
#include <linux/pid_namespace.h>
#include <linux/cn_proc.h>
/*
* Size of a cn_msg followed by a proc_event structure. Since the
* sizeof struct cn_msg is a multiple of 4 bytes, but not 8 bytes, we
* add one 4-byte word to the size here, and then start the actual
* cn_msg structure 4 bytes into the stack buffer. The result is that
* the immediately following proc_event structure is aligned to 8 bytes.
*/
#define CN_PROC_MSG_SIZE (sizeof(struct cn_msg) + sizeof(struct proc_event) + 4)
/* See comment above; we test our assumption about sizeof struct cn_msg here. */
static inline struct cn_msg *buffer_to_cn_msg(__u8 *buffer)
{
BUILD_BUG_ON(sizeof(struct cn_msg) != 20);
return (struct cn_msg *)(buffer + 4);
}
static atomic_t proc_event_num_listeners = ATOMIC_INIT(0);
static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC };
/* proc_event_counts is used as the sequence number of the netlink message */
static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 };
static inline void send_msg(struct cn_msg *msg)
{
preempt_disable();
msg->seq = __this_cpu_inc_return(proc_event_counts) - 1;
((struct proc_event *)msg->data)->cpu = smp_processor_id();
/*
* Preemption remains disabled during send to ensure the messages are
* ordered according to their sequence numbers.
*
* If cn_netlink_send() fails, the data is not sent.
*/
cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_NOWAIT);
preempt_enable();
}
void proc_fork_connector(struct task_struct *task)
{
struct cn_msg *msg;
struct proc_event *ev;
__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
struct task_struct *parent;
if (atomic_read(&proc_event_num_listeners) < 1)
return;
msg = buffer_to_cn_msg(buffer);
ev = (struct proc_event *)msg->data;
memset(&ev->event_data, 0, sizeof(ev->event_data));
ev->timestamp_ns = ktime_get_ns();
ev->what = PROC_EVENT_FORK;
rcu_read_lock();
parent = rcu_dereference(task->real_parent);
ev->event_data.fork.parent_pid = parent->pid;
ev->event_data.fork.parent_tgid = parent->tgid;
rcu_read_unlock();
ev->event_data.fork.child_pid = task->pid;
ev->event_data.fork.child_tgid = task->tgid;
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
msg->len = sizeof(*ev);
msg->flags = 0; /* not used */
send_msg(msg);
}
void proc_exec_connector(struct task_struct *task)
{
struct cn_msg *msg;
struct proc_event *ev;
__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
if (atomic_read(&proc_event_num_listeners) < 1)
return;
msg = buffer_to_cn_msg(buffer);
ev = (struct proc_event *)msg->data;
memset(&ev->event_data, 0, sizeof(ev->event_data));
ev->timestamp_ns = ktime_get_ns();
ev->what = PROC_EVENT_EXEC;
ev->event_data.exec.process_pid = task->pid;
ev->event_data.exec.process_tgid = task->tgid;
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
msg->len = sizeof(*ev);
msg->flags = 0; /* not used */
send_msg(msg);
}
void proc_id_connector(struct task_struct *task, int which_id)
{
struct cn_msg *msg;
struct proc_event *ev;
__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
const struct cred *cred;
if (atomic_read(&proc_event_num_listeners) < 1)
return;
msg = buffer_to_cn_msg(buffer);
ev = (struct proc_event *)msg->data;
memset(&ev->event_data, 0, sizeof(ev->event_data));
ev->what = which_id;
ev->event_data.id.process_pid = task->pid;
ev->event_data.id.process_tgid = task->tgid;
rcu_read_lock();
cred = __task_cred(task);
if (which_id == PROC_EVENT_UID) {
ev->event_data.id.r.ruid = from_kuid_munged(&init_user_ns, cred->uid);
ev->event_data.id.e.euid = from_kuid_munged(&init_user_ns, cred->euid);
} else if (which_id == PROC_EVENT_GID) {
ev->event_data.id.r.rgid = from_kgid_munged(&init_user_ns, cred->gid);
ev->event_data.id.e.egid = from_kgid_munged(&init_user_ns, cred->egid);
} else {
rcu_read_unlock();
return;
}
rcu_read_unlock();
ev->timestamp_ns = ktime_get_ns();
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
msg->len = sizeof(*ev);
msg->flags = 0; /* not used */
send_msg(msg);
}
void proc_sid_connector(struct task_struct *task)
{
struct cn_msg *msg;
struct proc_event *ev;
__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
if (atomic_read(&proc_event_num_listeners) < 1)
return;
msg = buffer_to_cn_msg(buffer);
ev = (struct proc_event *)msg->data;
memset(&ev->event_data, 0, sizeof(ev->event_data));
ev->timestamp_ns = ktime_get_ns();
ev->what = PROC_EVENT_SID;
ev->event_data.sid.process_pid = task->pid;
ev->event_data.sid.process_tgid = task->tgid;
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
msg->len = sizeof(*ev);
msg->flags = 0; /* not used */
send_msg(msg);
}
void proc_ptrace_connector(struct task_struct *task, int ptrace_id)
{
struct cn_msg *msg;
struct proc_event *ev;
__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
if (atomic_read(&proc_event_num_listeners) < 1)
return;
msg = buffer_to_cn_msg(buffer);
ev = (struct proc_event *)msg->data;
memset(&ev->event_data, 0, sizeof(ev->event_data));
ev->timestamp_ns = ktime_get_ns();
ev->what = PROC_EVENT_PTRACE;
ev->event_data.ptrace.process_pid = task->pid;
ev->event_data.ptrace.process_tgid = task->tgid;
if (ptrace_id == PTRACE_ATTACH) {
ev->event_data.ptrace.tracer_pid = current->pid;
ev->event_data.ptrace.tracer_tgid = current->tgid;
} else if (ptrace_id == PTRACE_DETACH) {
ev->event_data.ptrace.tracer_pid = 0;
ev->event_data.ptrace.tracer_tgid = 0;
} else
return;
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
msg->len = sizeof(*ev);
msg->flags = 0; /* not used */
send_msg(msg);
}
void proc_comm_connector(struct task_struct *task)
{
struct cn_msg *msg;
struct proc_event *ev;
__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
if (atomic_read(&proc_event_num_listeners) < 1)
return;
msg = buffer_to_cn_msg(buffer);
ev = (struct proc_event *)msg->data;
memset(&ev->event_data, 0, sizeof(ev->event_data));
ev->timestamp_ns = ktime_get_ns();
ev->what = PROC_EVENT_COMM;
ev->event_data.comm.process_pid = task->pid;
ev->event_data.comm.process_tgid = task->tgid;
get_task_comm(ev->event_data.comm.comm, task);
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
msg->len = sizeof(*ev);
msg->flags = 0; /* not used */
send_msg(msg);
}
void proc_coredump_connector(struct task_struct *task)
{
struct cn_msg *msg;
struct proc_event *ev;
struct task_struct *parent;
__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
if (atomic_read(&proc_event_num_listeners) < 1)
return;
msg = buffer_to_cn_msg(buffer);
ev = (struct proc_event *)msg->data;
memset(&ev->event_data, 0, sizeof(ev->event_data));
ev->timestamp_ns = ktime_get_ns();
ev->what = PROC_EVENT_COREDUMP;
ev->event_data.coredump.process_pid = task->pid;
ev->event_data.coredump.process_tgid = task->tgid;
rcu_read_lock();
if (pid_alive(task)) {
parent = rcu_dereference(task->real_parent);
ev->event_data.coredump.parent_pid = parent->pid;
ev->event_data.coredump.parent_tgid = parent->tgid;
}
rcu_read_unlock();
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
msg->len = sizeof(*ev);
msg->flags = 0; /* not used */
send_msg(msg);
}
void proc_exit_connector(struct task_struct *task)
{
struct cn_msg *msg;
struct proc_event *ev;
struct task_struct *parent;
__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
if (atomic_read(&proc_event_num_listeners) < 1)
return;
msg = buffer_to_cn_msg(buffer);
ev = (struct proc_event *)msg->data;
memset(&ev->event_data, 0, sizeof(ev->event_data));
ev->timestamp_ns = ktime_get_ns();
ev->what = PROC_EVENT_EXIT;
ev->event_data.exit.process_pid = task->pid;
ev->event_data.exit.process_tgid = task->tgid;
ev->event_data.exit.exit_code = task->exit_code;
ev->event_data.exit.exit_signal = task->exit_signal;
rcu_read_lock();
if (pid_alive(task)) {
parent = rcu_dereference(task->real_parent);
ev->event_data.exit.parent_pid = parent->pid;
ev->event_data.exit.parent_tgid = parent->tgid;
}
rcu_read_unlock();
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
msg->len = sizeof(*ev);
msg->flags = 0; /* not used */
send_msg(msg);
}
/*
* Send an acknowledgement message to userspace
*
* Use 0 for success, EFOO otherwise.
* Note: this is the negative of conventional kernel error
* values because it's not being returned via syscall return
* mechanisms.
*/
static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack)
{
struct cn_msg *msg;
struct proc_event *ev;
__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
if (atomic_read(&proc_event_num_listeners) < 1)
return;
msg = buffer_to_cn_msg(buffer);
ev = (struct proc_event *)msg->data;
memset(&ev->event_data, 0, sizeof(ev->event_data));
msg->seq = rcvd_seq;
ev->timestamp_ns = ktime_get_ns();
ev->cpu = -1;
ev->what = PROC_EVENT_NONE;
ev->event_data.ack.err = err;
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = rcvd_ack + 1;
msg->len = sizeof(*ev);
msg->flags = 0; /* not used */
send_msg(msg);
}
/**
* cn_proc_mcast_ctl
* @data: message sent from userspace via the connector
*/
static void cn_proc_mcast_ctl(struct cn_msg *msg,
struct netlink_skb_parms *nsp)
{
enum proc_cn_mcast_op *mc_op = NULL;
int err = 0;
if (msg->len != sizeof(*mc_op))
return;
/*
* Events are reported with respect to the initial pid
* and user namespaces so ignore requestors from
* other namespaces.
*/
if ((current_user_ns() != &init_user_ns) ||
(task_active_pid_ns(current) != &init_pid_ns))
return;
/* Can only change if privileged. */
if (!__netlink_ns_capable(nsp, &init_user_ns, CAP_NET_ADMIN)) {
err = EPERM;
goto out;
}
mc_op = (enum proc_cn_mcast_op *)msg->data;
switch (*mc_op) {
case PROC_CN_MCAST_LISTEN:
atomic_inc(&proc_event_num_listeners);
break;
case PROC_CN_MCAST_IGNORE:
atomic_dec(&proc_event_num_listeners);
break;
default:
err = EINVAL;
break;
}
out:
cn_proc_ack(err, msg->seq, msg->ack);
}
/*
* cn_proc_init - initialization entry point
*
* Adds the connector callback to the connector driver.
*/
static int __init cn_proc_init(void)
{
int err = cn_add_callback(&cn_proc_event_id,
"cn_proc",
&cn_proc_mcast_ctl);
if (err) {
pr_warn("cn_proc failed to register\n");
return err;
}
return 0;
}
device_initcall(cn_proc_init);