mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-15 20:07:05 +07:00
a33121e548
In a case when a ptp chardev (like /dev/ptp0) is open but an underlying device is removed, closing this file leads to a race. This reproduces easily in a kvm virtual machine: ts# cat openptp0.c int main() { ... fp = fopen("/dev/ptp0", "r"); ... sleep(10); } ts# uname -r 5.5.0-rc3-46cf053e ts# cat /proc/cmdline ... slub_debug=FZP ts# modprobe ptp_kvm ts# ./openptp0 & [1] 670 opened /dev/ptp0, sleeping 10s... ts# rmmod ptp_kvm ts# ls /dev/ptp* ls: cannot access '/dev/ptp*': No such file or directory ts# ...woken up [ 48.010809] general protection fault: 0000 [#1] SMP [ 48.012502] CPU: 6 PID: 658 Comm: openptp0 Not tainted 5.5.0-rc3-46cf053e #25 [ 48.014624] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ... [ 48.016270] RIP: 0010:module_put.part.0+0x7/0x80 [ 48.017939] RSP: 0018:ffffb3850073be00 EFLAGS: 00010202 [ 48.018339] RAX: 000000006b6b6b6b RBX: 6b6b6b6b6b6b6b6b RCX: ffff89a476c00ad0 [ 48.018936] RDX: fffff65a08d3ea08 RSI: 0000000000000247 RDI: 6b6b6b6b6b6b6b6b [ 48.019470] ... ^^^ a slub poison [ 48.023854] Call Trace: [ 48.024050] __fput+0x21f/0x240 [ 48.024288] task_work_run+0x79/0x90 [ 48.024555] do_exit+0x2af/0xab0 [ 48.024799] ? vfs_write+0x16a/0x190 [ 48.025082] do_group_exit+0x35/0x90 [ 48.025387] __x64_sys_exit_group+0xf/0x10 [ 48.025737] do_syscall_64+0x3d/0x130 [ 48.026056] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 48.026479] RIP: 0033:0x7f53b12082f6 [ 48.026792] ... [ 48.030945] Modules linked in: ptp i6300esb watchdog [last unloaded: ptp_kvm] [ 48.045001] Fixing recursive fault but reboot is needed! This happens in: static void __fput(struct file *file) { ... if (file->f_op->release) file->f_op->release(inode, file); <<< cdev is kfree'd here if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && !(mode & FMODE_PATH))) { cdev_put(inode->i_cdev); <<< cdev fields are accessed here Namely: __fput() posix_clock_release() kref_put(&clk->kref, delete_clock) <<< the last reference delete_clock() delete_ptp_clock() kfree(ptp) <<< cdev is embedded in ptp cdev_put module_put(p->owner) <<< *p is kfree'd, bang! Here cdev is embedded in posix_clock which is embedded in ptp_clock. The race happens because ptp_clock's lifetime is controlled by two refcounts: kref and cdev.kobj in posix_clock. This is wrong. Make ptp_clock's sysfs device a parent of cdev with cdev_device_add() created especially for such cases. This way the parent device with its ptp_clock is not released until all references to the cdev are released. This adds a requirement that an initialized but not exposed struct device should be provided to posix_clock_register() by a caller instead of a simple dev_t. This approach was adopted from the commit72139dfa24
("watchdog: Fix the race between the release of watchdog_core_data and cdev"). See details of the implementation in the commit233ed09d7f
("chardev: add helper function to register char devs with a struct device"). Link: https://lore.kernel.org/linux-fsdevel/20191125125342.6189-1-vdronov@redhat.com/T/#u Analyzed-by: Stephen Johnston <sjohnsto@redhat.com> Analyzed-by: Vern Lovejoy <vlovejoy@redhat.com> Signed-off-by: Vladis Dronov <vdronov@redhat.com> Acked-by: Richard Cochran <richardcochran@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
93 lines
2.6 KiB
C
93 lines
2.6 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
/*
|
|
* PTP 1588 clock support - private declarations for the core module.
|
|
*
|
|
* Copyright (C) 2010 OMICRON electronics GmbH
|
|
*/
|
|
#ifndef _PTP_PRIVATE_H_
|
|
#define _PTP_PRIVATE_H_
|
|
|
|
#include <linux/cdev.h>
|
|
#include <linux/device.h>
|
|
#include <linux/kthread.h>
|
|
#include <linux/mutex.h>
|
|
#include <linux/posix-clock.h>
|
|
#include <linux/ptp_clock.h>
|
|
#include <linux/ptp_clock_kernel.h>
|
|
#include <linux/time.h>
|
|
|
|
#define PTP_MAX_TIMESTAMPS 128
|
|
#define PTP_BUF_TIMESTAMPS 30
|
|
|
|
struct timestamp_event_queue {
|
|
struct ptp_extts_event buf[PTP_MAX_TIMESTAMPS];
|
|
int head;
|
|
int tail;
|
|
spinlock_t lock;
|
|
};
|
|
|
|
struct ptp_clock {
|
|
struct posix_clock clock;
|
|
struct device dev;
|
|
struct ptp_clock_info *info;
|
|
dev_t devid;
|
|
int index; /* index into clocks.map */
|
|
struct pps_device *pps_source;
|
|
long dialed_frequency; /* remembers the frequency adjustment */
|
|
struct timestamp_event_queue tsevq; /* simple fifo for time stamps */
|
|
struct mutex tsevq_mux; /* one process at a time reading the fifo */
|
|
struct mutex pincfg_mux; /* protect concurrent info->pin_config access */
|
|
wait_queue_head_t tsev_wq;
|
|
int defunct; /* tells readers to go away when clock is being removed */
|
|
struct device_attribute *pin_dev_attr;
|
|
struct attribute **pin_attr;
|
|
struct attribute_group pin_attr_group;
|
|
/* 1st entry is a pointer to the real group, 2nd is NULL terminator */
|
|
const struct attribute_group *pin_attr_groups[2];
|
|
struct kthread_worker *kworker;
|
|
struct kthread_delayed_work aux_work;
|
|
};
|
|
|
|
/*
|
|
* The function queue_cnt() is safe for readers to call without
|
|
* holding q->lock. Readers use this function to verify that the queue
|
|
* is nonempty before proceeding with a dequeue operation. The fact
|
|
* that a writer might concurrently increment the tail does not
|
|
* matter, since the queue remains nonempty nonetheless.
|
|
*/
|
|
static inline int queue_cnt(struct timestamp_event_queue *q)
|
|
{
|
|
int cnt = q->tail - q->head;
|
|
return cnt < 0 ? PTP_MAX_TIMESTAMPS + cnt : cnt;
|
|
}
|
|
|
|
/*
|
|
* see ptp_chardev.c
|
|
*/
|
|
|
|
/* caller must hold pincfg_mux */
|
|
int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin,
|
|
enum ptp_pin_function func, unsigned int chan);
|
|
|
|
long ptp_ioctl(struct posix_clock *pc,
|
|
unsigned int cmd, unsigned long arg);
|
|
|
|
int ptp_open(struct posix_clock *pc, fmode_t fmode);
|
|
|
|
ssize_t ptp_read(struct posix_clock *pc,
|
|
uint flags, char __user *buf, size_t cnt);
|
|
|
|
__poll_t ptp_poll(struct posix_clock *pc,
|
|
struct file *fp, poll_table *wait);
|
|
|
|
/*
|
|
* see ptp_sysfs.c
|
|
*/
|
|
|
|
extern const struct attribute_group *ptp_groups[];
|
|
|
|
int ptp_populate_pin_groups(struct ptp_clock *ptp);
|
|
void ptp_cleanup_pin_groups(struct ptp_clock *ptp);
|
|
|
|
#endif
|