mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-16 05:56:45 +07:00
a33121e548
In a case when a ptp chardev (like /dev/ptp0) is open but an underlying device is removed, closing this file leads to a race. This reproduces easily in a kvm virtual machine: ts# cat openptp0.c int main() { ... fp = fopen("/dev/ptp0", "r"); ... sleep(10); } ts# uname -r 5.5.0-rc3-46cf053e ts# cat /proc/cmdline ... slub_debug=FZP ts# modprobe ptp_kvm ts# ./openptp0 & [1] 670 opened /dev/ptp0, sleeping 10s... ts# rmmod ptp_kvm ts# ls /dev/ptp* ls: cannot access '/dev/ptp*': No such file or directory ts# ...woken up [ 48.010809] general protection fault: 0000 [#1] SMP [ 48.012502] CPU: 6 PID: 658 Comm: openptp0 Not tainted 5.5.0-rc3-46cf053e #25 [ 48.014624] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ... [ 48.016270] RIP: 0010:module_put.part.0+0x7/0x80 [ 48.017939] RSP: 0018:ffffb3850073be00 EFLAGS: 00010202 [ 48.018339] RAX: 000000006b6b6b6b RBX: 6b6b6b6b6b6b6b6b RCX: ffff89a476c00ad0 [ 48.018936] RDX: fffff65a08d3ea08 RSI: 0000000000000247 RDI: 6b6b6b6b6b6b6b6b [ 48.019470] ... ^^^ a slub poison [ 48.023854] Call Trace: [ 48.024050] __fput+0x21f/0x240 [ 48.024288] task_work_run+0x79/0x90 [ 48.024555] do_exit+0x2af/0xab0 [ 48.024799] ? vfs_write+0x16a/0x190 [ 48.025082] do_group_exit+0x35/0x90 [ 48.025387] __x64_sys_exit_group+0xf/0x10 [ 48.025737] do_syscall_64+0x3d/0x130 [ 48.026056] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 48.026479] RIP: 0033:0x7f53b12082f6 [ 48.026792] ... [ 48.030945] Modules linked in: ptp i6300esb watchdog [last unloaded: ptp_kvm] [ 48.045001] Fixing recursive fault but reboot is needed! This happens in: static void __fput(struct file *file) { ... if (file->f_op->release) file->f_op->release(inode, file); <<< cdev is kfree'd here if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && !(mode & FMODE_PATH))) { cdev_put(inode->i_cdev); <<< cdev fields are accessed here Namely: __fput() posix_clock_release() kref_put(&clk->kref, delete_clock) <<< the last reference delete_clock() delete_ptp_clock() kfree(ptp) <<< cdev is embedded in ptp cdev_put module_put(p->owner) <<< *p is kfree'd, bang! Here cdev is embedded in posix_clock which is embedded in ptp_clock. The race happens because ptp_clock's lifetime is controlled by two refcounts: kref and cdev.kobj in posix_clock. This is wrong. Make ptp_clock's sysfs device a parent of cdev with cdev_device_add() created especially for such cases. This way the parent device with its ptp_clock is not released until all references to the cdev are released. This adds a requirement that an initialized but not exposed struct device should be provided to posix_clock_register() by a caller instead of a simple dev_t. This approach was adopted from the commit72139dfa24
("watchdog: Fix the race between the release of watchdog_core_data and cdev"). See details of the implementation in the commit233ed09d7f
("chardev: add helper function to register char devs with a struct device"). Link: https://lore.kernel.org/linux-fsdevel/20191125125342.6189-1-vdronov@redhat.com/T/#u Analyzed-by: Stephen Johnston <sjohnsto@redhat.com> Analyzed-by: Vern Lovejoy <vlovejoy@redhat.com> Signed-off-by: Vladis Dronov <vdronov@redhat.com> Acked-by: Richard Cochran <richardcochran@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
411 lines
9.3 KiB
C
411 lines
9.3 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* PTP 1588 clock support
|
|
*
|
|
* Copyright (C) 2010 OMICRON electronics GmbH
|
|
*/
|
|
#include <linux/idr.h>
|
|
#include <linux/device.h>
|
|
#include <linux/err.h>
|
|
#include <linux/init.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/posix-clock.h>
|
|
#include <linux/pps_kernel.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/uaccess.h>
|
|
#include <uapi/linux/sched/types.h>
|
|
|
|
#include "ptp_private.h"
|
|
|
|
#define PTP_MAX_ALARMS 4
|
|
#define PTP_PPS_DEFAULTS (PPS_CAPTUREASSERT | PPS_OFFSETASSERT)
|
|
#define PTP_PPS_EVENT PPS_CAPTUREASSERT
|
|
#define PTP_PPS_MODE (PTP_PPS_DEFAULTS | PPS_CANWAIT | PPS_TSFMT_TSPEC)
|
|
|
|
/* private globals */
|
|
|
|
static dev_t ptp_devt;
|
|
static struct class *ptp_class;
|
|
|
|
static DEFINE_IDA(ptp_clocks_map);
|
|
|
|
/* time stamp event queue operations */
|
|
|
|
static inline int queue_free(struct timestamp_event_queue *q)
|
|
{
|
|
return PTP_MAX_TIMESTAMPS - queue_cnt(q) - 1;
|
|
}
|
|
|
|
static void enqueue_external_timestamp(struct timestamp_event_queue *queue,
|
|
struct ptp_clock_event *src)
|
|
{
|
|
struct ptp_extts_event *dst;
|
|
unsigned long flags;
|
|
s64 seconds;
|
|
u32 remainder;
|
|
|
|
seconds = div_u64_rem(src->timestamp, 1000000000, &remainder);
|
|
|
|
spin_lock_irqsave(&queue->lock, flags);
|
|
|
|
dst = &queue->buf[queue->tail];
|
|
dst->index = src->index;
|
|
dst->t.sec = seconds;
|
|
dst->t.nsec = remainder;
|
|
|
|
if (!queue_free(queue))
|
|
queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
|
|
|
|
queue->tail = (queue->tail + 1) % PTP_MAX_TIMESTAMPS;
|
|
|
|
spin_unlock_irqrestore(&queue->lock, flags);
|
|
}
|
|
|
|
s32 scaled_ppm_to_ppb(long ppm)
|
|
{
|
|
/*
|
|
* The 'freq' field in the 'struct timex' is in parts per
|
|
* million, but with a 16 bit binary fractional field.
|
|
*
|
|
* We want to calculate
|
|
*
|
|
* ppb = scaled_ppm * 1000 / 2^16
|
|
*
|
|
* which simplifies to
|
|
*
|
|
* ppb = scaled_ppm * 125 / 2^13
|
|
*/
|
|
s64 ppb = 1 + ppm;
|
|
ppb *= 125;
|
|
ppb >>= 13;
|
|
return (s32) ppb;
|
|
}
|
|
EXPORT_SYMBOL(scaled_ppm_to_ppb);
|
|
|
|
/* posix clock implementation */
|
|
|
|
static int ptp_clock_getres(struct posix_clock *pc, struct timespec64 *tp)
|
|
{
|
|
tp->tv_sec = 0;
|
|
tp->tv_nsec = 1;
|
|
return 0;
|
|
}
|
|
|
|
static int ptp_clock_settime(struct posix_clock *pc, const struct timespec64 *tp)
|
|
{
|
|
struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
|
|
|
|
return ptp->info->settime64(ptp->info, tp);
|
|
}
|
|
|
|
static int ptp_clock_gettime(struct posix_clock *pc, struct timespec64 *tp)
|
|
{
|
|
struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
|
|
int err;
|
|
|
|
if (ptp->info->gettimex64)
|
|
err = ptp->info->gettimex64(ptp->info, tp, NULL);
|
|
else
|
|
err = ptp->info->gettime64(ptp->info, tp);
|
|
return err;
|
|
}
|
|
|
|
static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx)
|
|
{
|
|
struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
|
|
struct ptp_clock_info *ops;
|
|
int err = -EOPNOTSUPP;
|
|
|
|
ops = ptp->info;
|
|
|
|
if (tx->modes & ADJ_SETOFFSET) {
|
|
struct timespec64 ts;
|
|
ktime_t kt;
|
|
s64 delta;
|
|
|
|
ts.tv_sec = tx->time.tv_sec;
|
|
ts.tv_nsec = tx->time.tv_usec;
|
|
|
|
if (!(tx->modes & ADJ_NANO))
|
|
ts.tv_nsec *= 1000;
|
|
|
|
if ((unsigned long) ts.tv_nsec >= NSEC_PER_SEC)
|
|
return -EINVAL;
|
|
|
|
kt = timespec64_to_ktime(ts);
|
|
delta = ktime_to_ns(kt);
|
|
err = ops->adjtime(ops, delta);
|
|
} else if (tx->modes & ADJ_FREQUENCY) {
|
|
s32 ppb = scaled_ppm_to_ppb(tx->freq);
|
|
if (ppb > ops->max_adj || ppb < -ops->max_adj)
|
|
return -ERANGE;
|
|
if (ops->adjfine)
|
|
err = ops->adjfine(ops, tx->freq);
|
|
else
|
|
err = ops->adjfreq(ops, ppb);
|
|
ptp->dialed_frequency = tx->freq;
|
|
} else if (tx->modes == 0) {
|
|
tx->freq = ptp->dialed_frequency;
|
|
err = 0;
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
static struct posix_clock_operations ptp_clock_ops = {
|
|
.owner = THIS_MODULE,
|
|
.clock_adjtime = ptp_clock_adjtime,
|
|
.clock_gettime = ptp_clock_gettime,
|
|
.clock_getres = ptp_clock_getres,
|
|
.clock_settime = ptp_clock_settime,
|
|
.ioctl = ptp_ioctl,
|
|
.open = ptp_open,
|
|
.poll = ptp_poll,
|
|
.read = ptp_read,
|
|
};
|
|
|
|
static void ptp_clock_release(struct device *dev)
|
|
{
|
|
struct ptp_clock *ptp = container_of(dev, struct ptp_clock, dev);
|
|
|
|
mutex_destroy(&ptp->tsevq_mux);
|
|
mutex_destroy(&ptp->pincfg_mux);
|
|
ida_simple_remove(&ptp_clocks_map, ptp->index);
|
|
kfree(ptp);
|
|
}
|
|
|
|
static void ptp_aux_kworker(struct kthread_work *work)
|
|
{
|
|
struct ptp_clock *ptp = container_of(work, struct ptp_clock,
|
|
aux_work.work);
|
|
struct ptp_clock_info *info = ptp->info;
|
|
long delay;
|
|
|
|
delay = info->do_aux_work(info);
|
|
|
|
if (delay >= 0)
|
|
kthread_queue_delayed_work(ptp->kworker, &ptp->aux_work, delay);
|
|
}
|
|
|
|
/* public interface */
|
|
|
|
struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
|
|
struct device *parent)
|
|
{
|
|
struct ptp_clock *ptp;
|
|
int err = 0, index, major = MAJOR(ptp_devt);
|
|
|
|
if (info->n_alarm > PTP_MAX_ALARMS)
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
/* Initialize a clock structure. */
|
|
err = -ENOMEM;
|
|
ptp = kzalloc(sizeof(struct ptp_clock), GFP_KERNEL);
|
|
if (ptp == NULL)
|
|
goto no_memory;
|
|
|
|
index = ida_simple_get(&ptp_clocks_map, 0, MINORMASK + 1, GFP_KERNEL);
|
|
if (index < 0) {
|
|
err = index;
|
|
goto no_slot;
|
|
}
|
|
|
|
ptp->clock.ops = ptp_clock_ops;
|
|
ptp->info = info;
|
|
ptp->devid = MKDEV(major, index);
|
|
ptp->index = index;
|
|
spin_lock_init(&ptp->tsevq.lock);
|
|
mutex_init(&ptp->tsevq_mux);
|
|
mutex_init(&ptp->pincfg_mux);
|
|
init_waitqueue_head(&ptp->tsev_wq);
|
|
|
|
if (ptp->info->do_aux_work) {
|
|
kthread_init_delayed_work(&ptp->aux_work, ptp_aux_kworker);
|
|
ptp->kworker = kthread_create_worker(0, "ptp%d", ptp->index);
|
|
if (IS_ERR(ptp->kworker)) {
|
|
err = PTR_ERR(ptp->kworker);
|
|
pr_err("failed to create ptp aux_worker %d\n", err);
|
|
goto kworker_err;
|
|
}
|
|
}
|
|
|
|
err = ptp_populate_pin_groups(ptp);
|
|
if (err)
|
|
goto no_pin_groups;
|
|
|
|
/* Register a new PPS source. */
|
|
if (info->pps) {
|
|
struct pps_source_info pps;
|
|
memset(&pps, 0, sizeof(pps));
|
|
snprintf(pps.name, PPS_MAX_NAME_LEN, "ptp%d", index);
|
|
pps.mode = PTP_PPS_MODE;
|
|
pps.owner = info->owner;
|
|
ptp->pps_source = pps_register_source(&pps, PTP_PPS_DEFAULTS);
|
|
if (IS_ERR(ptp->pps_source)) {
|
|
err = PTR_ERR(ptp->pps_source);
|
|
pr_err("failed to register pps source\n");
|
|
goto no_pps;
|
|
}
|
|
}
|
|
|
|
/* Initialize a new device of our class in our clock structure. */
|
|
device_initialize(&ptp->dev);
|
|
ptp->dev.devt = ptp->devid;
|
|
ptp->dev.class = ptp_class;
|
|
ptp->dev.parent = parent;
|
|
ptp->dev.groups = ptp->pin_attr_groups;
|
|
ptp->dev.release = ptp_clock_release;
|
|
dev_set_drvdata(&ptp->dev, ptp);
|
|
dev_set_name(&ptp->dev, "ptp%d", ptp->index);
|
|
|
|
/* Create a posix clock and link it to the device. */
|
|
err = posix_clock_register(&ptp->clock, &ptp->dev);
|
|
if (err) {
|
|
pr_err("failed to create posix clock\n");
|
|
goto no_clock;
|
|
}
|
|
|
|
return ptp;
|
|
|
|
no_clock:
|
|
if (ptp->pps_source)
|
|
pps_unregister_source(ptp->pps_source);
|
|
no_pps:
|
|
ptp_cleanup_pin_groups(ptp);
|
|
no_pin_groups:
|
|
if (ptp->kworker)
|
|
kthread_destroy_worker(ptp->kworker);
|
|
kworker_err:
|
|
mutex_destroy(&ptp->tsevq_mux);
|
|
mutex_destroy(&ptp->pincfg_mux);
|
|
ida_simple_remove(&ptp_clocks_map, index);
|
|
no_slot:
|
|
kfree(ptp);
|
|
no_memory:
|
|
return ERR_PTR(err);
|
|
}
|
|
EXPORT_SYMBOL(ptp_clock_register);
|
|
|
|
int ptp_clock_unregister(struct ptp_clock *ptp)
|
|
{
|
|
ptp->defunct = 1;
|
|
wake_up_interruptible(&ptp->tsev_wq);
|
|
|
|
if (ptp->kworker) {
|
|
kthread_cancel_delayed_work_sync(&ptp->aux_work);
|
|
kthread_destroy_worker(ptp->kworker);
|
|
}
|
|
|
|
/* Release the clock's resources. */
|
|
if (ptp->pps_source)
|
|
pps_unregister_source(ptp->pps_source);
|
|
|
|
ptp_cleanup_pin_groups(ptp);
|
|
|
|
posix_clock_unregister(&ptp->clock);
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(ptp_clock_unregister);
|
|
|
|
void ptp_clock_event(struct ptp_clock *ptp, struct ptp_clock_event *event)
|
|
{
|
|
struct pps_event_time evt;
|
|
|
|
switch (event->type) {
|
|
|
|
case PTP_CLOCK_ALARM:
|
|
break;
|
|
|
|
case PTP_CLOCK_EXTTS:
|
|
enqueue_external_timestamp(&ptp->tsevq, event);
|
|
wake_up_interruptible(&ptp->tsev_wq);
|
|
break;
|
|
|
|
case PTP_CLOCK_PPS:
|
|
pps_get_ts(&evt);
|
|
pps_event(ptp->pps_source, &evt, PTP_PPS_EVENT, NULL);
|
|
break;
|
|
|
|
case PTP_CLOCK_PPSUSR:
|
|
pps_event(ptp->pps_source, &event->pps_times,
|
|
PTP_PPS_EVENT, NULL);
|
|
break;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(ptp_clock_event);
|
|
|
|
int ptp_clock_index(struct ptp_clock *ptp)
|
|
{
|
|
return ptp->index;
|
|
}
|
|
EXPORT_SYMBOL(ptp_clock_index);
|
|
|
|
int ptp_find_pin(struct ptp_clock *ptp,
|
|
enum ptp_pin_function func, unsigned int chan)
|
|
{
|
|
struct ptp_pin_desc *pin = NULL;
|
|
int i;
|
|
|
|
mutex_lock(&ptp->pincfg_mux);
|
|
for (i = 0; i < ptp->info->n_pins; i++) {
|
|
if (ptp->info->pin_config[i].func == func &&
|
|
ptp->info->pin_config[i].chan == chan) {
|
|
pin = &ptp->info->pin_config[i];
|
|
break;
|
|
}
|
|
}
|
|
mutex_unlock(&ptp->pincfg_mux);
|
|
|
|
return pin ? i : -1;
|
|
}
|
|
EXPORT_SYMBOL(ptp_find_pin);
|
|
|
|
int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay)
|
|
{
|
|
return kthread_mod_delayed_work(ptp->kworker, &ptp->aux_work, delay);
|
|
}
|
|
EXPORT_SYMBOL(ptp_schedule_worker);
|
|
|
|
/* module operations */
|
|
|
|
static void __exit ptp_exit(void)
|
|
{
|
|
class_destroy(ptp_class);
|
|
unregister_chrdev_region(ptp_devt, MINORMASK + 1);
|
|
ida_destroy(&ptp_clocks_map);
|
|
}
|
|
|
|
static int __init ptp_init(void)
|
|
{
|
|
int err;
|
|
|
|
ptp_class = class_create(THIS_MODULE, "ptp");
|
|
if (IS_ERR(ptp_class)) {
|
|
pr_err("ptp: failed to allocate class\n");
|
|
return PTR_ERR(ptp_class);
|
|
}
|
|
|
|
err = alloc_chrdev_region(&ptp_devt, 0, MINORMASK + 1, "ptp");
|
|
if (err < 0) {
|
|
pr_err("ptp: failed to allocate device region\n");
|
|
goto no_region;
|
|
}
|
|
|
|
ptp_class->dev_groups = ptp_groups;
|
|
pr_info("PTP clock support registered\n");
|
|
return 0;
|
|
|
|
no_region:
|
|
class_destroy(ptp_class);
|
|
return err;
|
|
}
|
|
|
|
subsys_initcall(ptp_init);
|
|
module_exit(ptp_exit);
|
|
|
|
MODULE_AUTHOR("Richard Cochran <richardcochran@gmail.com>");
|
|
MODULE_DESCRIPTION("PTP clocks support");
|
|
MODULE_LICENSE("GPL");
|