linux_dsm_epyc7002/drivers/leds/led-triggers.c
Andrea Righi 736924801c leds: trigger: fix potential deadlock with libata
commit 27af8e2c90fba242460b01fa020e6e19ed68c495 upstream.

We have the following potential deadlock condition:

 ========================================================
 WARNING: possible irq lock inversion dependency detected
 5.10.0-rc2+ #25 Not tainted
 --------------------------------------------------------
 swapper/3/0 just changed the state of lock:
 ffff8880063bd618 (&host->lock){-...}-{2:2}, at: ata_bmdma_interrupt+0x27/0x200
 but this lock took another, HARDIRQ-READ-unsafe lock in the past:
  (&trig->leddev_list_lock){.+.?}-{2:2}

 and interrupts could create inverse lock ordering between them.

 other info that might help us debug this:
  Possible interrupt unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(&trig->leddev_list_lock);
                                local_irq_disable();
                                lock(&host->lock);
                                lock(&trig->leddev_list_lock);
   <Interrupt>
     lock(&host->lock);

  *** DEADLOCK ***

 no locks held by swapper/3/0.

 the shortest dependencies between 2nd lock and 1st lock:
  -> (&trig->leddev_list_lock){.+.?}-{2:2} ops: 46 {
     HARDIRQ-ON-R at:
                       lock_acquire+0x15f/0x420
                       _raw_read_lock+0x42/0x90
                       led_trigger_event+0x2b/0x70
                       rfkill_global_led_trigger_worker+0x94/0xb0
                       process_one_work+0x240/0x560
                       worker_thread+0x58/0x3d0
                       kthread+0x151/0x170
                       ret_from_fork+0x1f/0x30
     IN-SOFTIRQ-R at:
                       lock_acquire+0x15f/0x420
                       _raw_read_lock+0x42/0x90
                       led_trigger_event+0x2b/0x70
                       kbd_bh+0x9e/0xc0
                       tasklet_action_common.constprop.0+0xe9/0x100
                       tasklet_action+0x22/0x30
                       __do_softirq+0xcc/0x46d
                       run_ksoftirqd+0x3f/0x70
                       smpboot_thread_fn+0x116/0x1f0
                       kthread+0x151/0x170
                       ret_from_fork+0x1f/0x30
     SOFTIRQ-ON-R at:
                       lock_acquire+0x15f/0x420
                       _raw_read_lock+0x42/0x90
                       led_trigger_event+0x2b/0x70
                       rfkill_global_led_trigger_worker+0x94/0xb0
                       process_one_work+0x240/0x560
                       worker_thread+0x58/0x3d0
                       kthread+0x151/0x170
                       ret_from_fork+0x1f/0x30
     INITIAL READ USE at:
                           lock_acquire+0x15f/0x420
                           _raw_read_lock+0x42/0x90
                           led_trigger_event+0x2b/0x70
                           rfkill_global_led_trigger_worker+0x94/0xb0
                           process_one_work+0x240/0x560
                           worker_thread+0x58/0x3d0
                           kthread+0x151/0x170
                           ret_from_fork+0x1f/0x30
   }
   ... key      at: [<ffffffff83da4c00>] __key.0+0x0/0x10
   ... acquired at:
    _raw_read_lock+0x42/0x90
    led_trigger_blink_oneshot+0x3b/0x90
    ledtrig_disk_activity+0x3c/0xa0
    ata_qc_complete+0x26/0x450
    ata_do_link_abort+0xa3/0xe0
    ata_port_freeze+0x2e/0x40
    ata_hsm_qc_complete+0x94/0xa0
    ata_sff_hsm_move+0x177/0x7a0
    ata_sff_pio_task+0xc7/0x1b0
    process_one_work+0x240/0x560
    worker_thread+0x58/0x3d0
    kthread+0x151/0x170
    ret_from_fork+0x1f/0x30

 -> (&host->lock){-...}-{2:2} ops: 69 {
    IN-HARDIRQ-W at:
                     lock_acquire+0x15f/0x420
                     _raw_spin_lock_irqsave+0x52/0xa0
                     ata_bmdma_interrupt+0x27/0x200
                     __handle_irq_event_percpu+0xd5/0x2b0
                     handle_irq_event+0x57/0xb0
                     handle_edge_irq+0x8c/0x230
                     asm_call_irq_on_stack+0xf/0x20
                     common_interrupt+0x100/0x1c0
                     asm_common_interrupt+0x1e/0x40
                     native_safe_halt+0xe/0x10
                     arch_cpu_idle+0x15/0x20
                     default_idle_call+0x59/0x1c0
                     do_idle+0x22c/0x2c0
                     cpu_startup_entry+0x20/0x30
                     start_secondary+0x11d/0x150
                     secondary_startup_64_no_verify+0xa6/0xab
    INITIAL USE at:
                    lock_acquire+0x15f/0x420
                    _raw_spin_lock_irqsave+0x52/0xa0
                    ata_dev_init+0x54/0xe0
                    ata_link_init+0x8b/0xd0
                    ata_port_alloc+0x1f1/0x210
                    ata_host_alloc+0xf1/0x130
                    ata_host_alloc_pinfo+0x14/0xb0
                    ata_pci_sff_prepare_host+0x41/0xa0
                    ata_pci_bmdma_prepare_host+0x14/0x30
                    piix_init_one+0x21f/0x600
                    local_pci_probe+0x48/0x80
                    pci_device_probe+0x105/0x1c0
                    really_probe+0x221/0x490
                    driver_probe_device+0xe9/0x160
                    device_driver_attach+0xb2/0xc0
                    __driver_attach+0x91/0x150
                    bus_for_each_dev+0x81/0xc0
                    driver_attach+0x1e/0x20
                    bus_add_driver+0x138/0x1f0
                    driver_register+0x91/0xf0
                    __pci_register_driver+0x73/0x80
                    piix_init+0x1e/0x2e
                    do_one_initcall+0x5f/0x2d0
                    kernel_init_freeable+0x26f/0x2cf
                    kernel_init+0xe/0x113
                    ret_from_fork+0x1f/0x30
  }
  ... key      at: [<ffffffff83d9fdc0>] __key.6+0x0/0x10
  ... acquired at:
    __lock_acquire+0x9da/0x2370
    lock_acquire+0x15f/0x420
    _raw_spin_lock_irqsave+0x52/0xa0
    ata_bmdma_interrupt+0x27/0x200
    __handle_irq_event_percpu+0xd5/0x2b0
    handle_irq_event+0x57/0xb0
    handle_edge_irq+0x8c/0x230
    asm_call_irq_on_stack+0xf/0x20
    common_interrupt+0x100/0x1c0
    asm_common_interrupt+0x1e/0x40
    native_safe_halt+0xe/0x10
    arch_cpu_idle+0x15/0x20
    default_idle_call+0x59/0x1c0
    do_idle+0x22c/0x2c0
    cpu_startup_entry+0x20/0x30
    start_secondary+0x11d/0x150
    secondary_startup_64_no_verify+0xa6/0xab

This lockdep splat is reported after:
commit e918188611 ("locking: More accurate annotations for read_lock()")

To clarify:
 - read-locks are recursive only in interrupt context (when
   in_interrupt() returns true)
 - after acquiring host->lock in CPU1, another cpu (i.e. CPU2) may call
   write_lock(&trig->leddev_list_lock) that would be blocked by CPU0
   that holds trig->leddev_list_lock in read-mode
 - when CPU1 (ata_ac_complete()) tries to read-lock
   trig->leddev_list_lock, it would be blocked by the write-lock waiter
   on CPU2 (because we are not in interrupt context, so the read-lock is
   not recursive)
 - at this point if an interrupt happens on CPU0 and
   ata_bmdma_interrupt() is executed it will try to acquire host->lock,
   that is held by CPU1, that is currently blocked by CPU2, so:

   * CPU0 blocked by CPU1
   * CPU1 blocked by CPU2
   * CPU2 blocked by CPU0

     *** DEADLOCK ***

The deadlock scenario is better represented by the following schema
(thanks to Boqun Feng <boqun.feng@gmail.com> for the schema and the
detailed explanation of the deadlock condition):

 CPU 0:                          CPU 1:                        CPU 2:
 -----                           -----                         -----
 led_trigger_event():
   read_lock(&trig->leddev_list_lock);
 				<workqueue>
 				ata_hsm_qc_complete():
 				  spin_lock_irqsave(&host->lock);
 								write_lock(&trig->leddev_list_lock);
 				  ata_port_freeze():
 				    ata_do_link_abort():
 				      ata_qc_complete():
 					ledtrig_disk_activity():
 					  led_trigger_blink_oneshot():
 					    read_lock(&trig->leddev_list_lock);
 					    // ^ not in in_interrupt() context, so could get blocked by CPU 2
 <interrupt>
   ata_bmdma_interrupt():
     spin_lock_irqsave(&host->lock);

Fix by using read_lock_irqsave/irqrestore() in led_trigger_event(), so
that no interrupt can happen in between, preventing the deadlock
condition.

Apply the same change to led_trigger_blink_setup() as well, since the
same deadlock scenario can also happen in power_supply_update_bat_leds()
-> led_trigger_blink() -> led_trigger_blink_setup() (workqueue context),
and potentially prevent other similar usages.

Link: https://lore.kernel.org/lkml/20201101092614.GB3989@xps-13-7390/
Fixes: eb25cb9956 ("leds: convert IDE trigger to common disk trigger")
Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
Signed-off-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2021-02-03 23:28:41 +01:00

464 lines
11 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* LED Triggers Core
*
* Copyright 2005-2007 Openedhand Ltd.
*
* Author: Richard Purdie <rpurdie@openedhand.com>
*/
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/device.h>
#include <linux/timer.h>
#include <linux/rwsem.h>
#include <linux/leds.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include "leds.h"
/*
* Nests outside led_cdev->trigger_lock
*/
static DECLARE_RWSEM(triggers_list_lock);
LIST_HEAD(trigger_list);
/* Used by LED Class */
static inline bool
trigger_relevant(struct led_classdev *led_cdev, struct led_trigger *trig)
{
return !trig->trigger_type || trig->trigger_type == led_cdev->trigger_type;
}
ssize_t led_trigger_write(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr, char *buf,
loff_t pos, size_t count)
{
struct device *dev = kobj_to_dev(kobj);
struct led_classdev *led_cdev = dev_get_drvdata(dev);
struct led_trigger *trig;
int ret = count;
mutex_lock(&led_cdev->led_access);
if (led_sysfs_is_disabled(led_cdev)) {
ret = -EBUSY;
goto unlock;
}
if (sysfs_streq(buf, "none")) {
led_trigger_remove(led_cdev);
goto unlock;
}
down_read(&triggers_list_lock);
list_for_each_entry(trig, &trigger_list, next_trig) {
if (sysfs_streq(buf, trig->name) && trigger_relevant(led_cdev, trig)) {
down_write(&led_cdev->trigger_lock);
led_trigger_set(led_cdev, trig);
up_write(&led_cdev->trigger_lock);
up_read(&triggers_list_lock);
goto unlock;
}
}
/* we come here only if buf matches no trigger */
ret = -EINVAL;
up_read(&triggers_list_lock);
unlock:
mutex_unlock(&led_cdev->led_access);
return ret;
}
EXPORT_SYMBOL_GPL(led_trigger_write);
__printf(3, 4)
static int led_trigger_snprintf(char *buf, ssize_t size, const char *fmt, ...)
{
va_list args;
int i;
va_start(args, fmt);
if (size <= 0)
i = vsnprintf(NULL, 0, fmt, args);
else
i = vscnprintf(buf, size, fmt, args);
va_end(args);
return i;
}
static int led_trigger_format(char *buf, size_t size,
struct led_classdev *led_cdev)
{
struct led_trigger *trig;
int len = led_trigger_snprintf(buf, size, "%s",
led_cdev->trigger ? "none" : "[none]");
list_for_each_entry(trig, &trigger_list, next_trig) {
bool hit;
if (!trigger_relevant(led_cdev, trig))
continue;
hit = led_cdev->trigger && !strcmp(led_cdev->trigger->name, trig->name);
len += led_trigger_snprintf(buf + len, size - len,
" %s%s%s", hit ? "[" : "",
trig->name, hit ? "]" : "");
}
len += led_trigger_snprintf(buf + len, size - len, "\n");
return len;
}
/*
* It was stupid to create 10000 cpu triggers, but we are stuck with it now.
* Don't make that mistake again. We work around it here by creating binary
* attribute, which is not limited by length. This is _not_ good design, do not
* copy it.
*/
ssize_t led_trigger_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *attr, char *buf,
loff_t pos, size_t count)
{
struct device *dev = kobj_to_dev(kobj);
struct led_classdev *led_cdev = dev_get_drvdata(dev);
void *data;
int len;
down_read(&triggers_list_lock);
down_read(&led_cdev->trigger_lock);
len = led_trigger_format(NULL, 0, led_cdev);
data = kvmalloc(len + 1, GFP_KERNEL);
if (!data) {
up_read(&led_cdev->trigger_lock);
up_read(&triggers_list_lock);
return -ENOMEM;
}
len = led_trigger_format(data, len + 1, led_cdev);
up_read(&led_cdev->trigger_lock);
up_read(&triggers_list_lock);
len = memory_read_from_buffer(buf, count, &pos, data, len);
kvfree(data);
return len;
}
EXPORT_SYMBOL_GPL(led_trigger_read);
/* Caller must ensure led_cdev->trigger_lock held */
int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
{
unsigned long flags;
char *event = NULL;
char *envp[2];
const char *name;
int ret;
if (!led_cdev->trigger && !trig)
return 0;
name = trig ? trig->name : "none";
event = kasprintf(GFP_KERNEL, "TRIGGER=%s", name);
/* Remove any existing trigger */
if (led_cdev->trigger) {
write_lock_irqsave(&led_cdev->trigger->leddev_list_lock, flags);
list_del(&led_cdev->trig_list);
write_unlock_irqrestore(&led_cdev->trigger->leddev_list_lock,
flags);
cancel_work_sync(&led_cdev->set_brightness_work);
led_stop_software_blink(led_cdev);
if (led_cdev->trigger->deactivate)
led_cdev->trigger->deactivate(led_cdev);
device_remove_groups(led_cdev->dev, led_cdev->trigger->groups);
led_cdev->trigger = NULL;
led_cdev->trigger_data = NULL;
led_cdev->activated = false;
led_set_brightness(led_cdev, LED_OFF);
}
if (trig) {
write_lock_irqsave(&trig->leddev_list_lock, flags);
list_add_tail(&led_cdev->trig_list, &trig->led_cdevs);
write_unlock_irqrestore(&trig->leddev_list_lock, flags);
led_cdev->trigger = trig;
if (trig->activate)
ret = trig->activate(led_cdev);
else
ret = 0;
if (ret)
goto err_activate;
ret = device_add_groups(led_cdev->dev, trig->groups);
if (ret) {
dev_err(led_cdev->dev, "Failed to add trigger attributes\n");
goto err_add_groups;
}
}
if (event) {
envp[0] = event;
envp[1] = NULL;
if (kobject_uevent_env(&led_cdev->dev->kobj, KOBJ_CHANGE, envp))
dev_err(led_cdev->dev,
"%s: Error sending uevent\n", __func__);
kfree(event);
}
return 0;
err_add_groups:
if (trig->deactivate)
trig->deactivate(led_cdev);
err_activate:
write_lock_irqsave(&led_cdev->trigger->leddev_list_lock, flags);
list_del(&led_cdev->trig_list);
write_unlock_irqrestore(&led_cdev->trigger->leddev_list_lock, flags);
led_cdev->trigger = NULL;
led_cdev->trigger_data = NULL;
led_set_brightness(led_cdev, LED_OFF);
kfree(event);
return ret;
}
EXPORT_SYMBOL_GPL(led_trigger_set);
void led_trigger_remove(struct led_classdev *led_cdev)
{
down_write(&led_cdev->trigger_lock);
led_trigger_set(led_cdev, NULL);
up_write(&led_cdev->trigger_lock);
}
EXPORT_SYMBOL_GPL(led_trigger_remove);
void led_trigger_set_default(struct led_classdev *led_cdev)
{
struct led_trigger *trig;
if (!led_cdev->default_trigger)
return;
down_read(&triggers_list_lock);
down_write(&led_cdev->trigger_lock);
list_for_each_entry(trig, &trigger_list, next_trig) {
if (!strcmp(led_cdev->default_trigger, trig->name) &&
trigger_relevant(led_cdev, trig)) {
led_cdev->flags |= LED_INIT_DEFAULT_TRIGGER;
led_trigger_set(led_cdev, trig);
break;
}
}
up_write(&led_cdev->trigger_lock);
up_read(&triggers_list_lock);
}
EXPORT_SYMBOL_GPL(led_trigger_set_default);
void led_trigger_rename_static(const char *name, struct led_trigger *trig)
{
/* new name must be on a temporary string to prevent races */
BUG_ON(name == trig->name);
down_write(&triggers_list_lock);
/* this assumes that trig->name was originaly allocated to
* non constant storage */
strcpy((char *)trig->name, name);
up_write(&triggers_list_lock);
}
EXPORT_SYMBOL_GPL(led_trigger_rename_static);
/* LED Trigger Interface */
int led_trigger_register(struct led_trigger *trig)
{
struct led_classdev *led_cdev;
struct led_trigger *_trig;
rwlock_init(&trig->leddev_list_lock);
INIT_LIST_HEAD(&trig->led_cdevs);
down_write(&triggers_list_lock);
/* Make sure the trigger's name isn't already in use */
list_for_each_entry(_trig, &trigger_list, next_trig) {
if (!strcmp(_trig->name, trig->name) &&
(trig->trigger_type == _trig->trigger_type ||
!trig->trigger_type || !_trig->trigger_type)) {
up_write(&triggers_list_lock);
return -EEXIST;
}
}
/* Add to the list of led triggers */
list_add_tail(&trig->next_trig, &trigger_list);
up_write(&triggers_list_lock);
/* Register with any LEDs that have this as a default trigger */
down_read(&leds_list_lock);
list_for_each_entry(led_cdev, &leds_list, node) {
down_write(&led_cdev->trigger_lock);
if (!led_cdev->trigger && led_cdev->default_trigger &&
!strcmp(led_cdev->default_trigger, trig->name) &&
trigger_relevant(led_cdev, trig)) {
led_cdev->flags |= LED_INIT_DEFAULT_TRIGGER;
led_trigger_set(led_cdev, trig);
}
up_write(&led_cdev->trigger_lock);
}
up_read(&leds_list_lock);
return 0;
}
EXPORT_SYMBOL_GPL(led_trigger_register);
void led_trigger_unregister(struct led_trigger *trig)
{
struct led_classdev *led_cdev;
if (list_empty_careful(&trig->next_trig))
return;
/* Remove from the list of led triggers */
down_write(&triggers_list_lock);
list_del_init(&trig->next_trig);
up_write(&triggers_list_lock);
/* Remove anyone actively using this trigger */
down_read(&leds_list_lock);
list_for_each_entry(led_cdev, &leds_list, node) {
down_write(&led_cdev->trigger_lock);
if (led_cdev->trigger == trig)
led_trigger_set(led_cdev, NULL);
up_write(&led_cdev->trigger_lock);
}
up_read(&leds_list_lock);
}
EXPORT_SYMBOL_GPL(led_trigger_unregister);
static void devm_led_trigger_release(struct device *dev, void *res)
{
led_trigger_unregister(*(struct led_trigger **)res);
}
int devm_led_trigger_register(struct device *dev,
struct led_trigger *trig)
{
struct led_trigger **dr;
int rc;
dr = devres_alloc(devm_led_trigger_release, sizeof(*dr),
GFP_KERNEL);
if (!dr)
return -ENOMEM;
*dr = trig;
rc = led_trigger_register(trig);
if (rc)
devres_free(dr);
else
devres_add(dev, dr);
return rc;
}
EXPORT_SYMBOL_GPL(devm_led_trigger_register);
/* Simple LED Trigger Interface */
void led_trigger_event(struct led_trigger *trig,
enum led_brightness brightness)
{
struct led_classdev *led_cdev;
unsigned long flags;
if (!trig)
return;
read_lock_irqsave(&trig->leddev_list_lock, flags);
list_for_each_entry(led_cdev, &trig->led_cdevs, trig_list)
led_set_brightness(led_cdev, brightness);
read_unlock_irqrestore(&trig->leddev_list_lock, flags);
}
EXPORT_SYMBOL_GPL(led_trigger_event);
static void led_trigger_blink_setup(struct led_trigger *trig,
unsigned long *delay_on,
unsigned long *delay_off,
int oneshot,
int invert)
{
struct led_classdev *led_cdev;
unsigned long flags;
if (!trig)
return;
read_lock_irqsave(&trig->leddev_list_lock, flags);
list_for_each_entry(led_cdev, &trig->led_cdevs, trig_list) {
if (oneshot)
led_blink_set_oneshot(led_cdev, delay_on, delay_off,
invert);
else
led_blink_set(led_cdev, delay_on, delay_off);
}
read_unlock_irqrestore(&trig->leddev_list_lock, flags);
}
void led_trigger_blink(struct led_trigger *trig,
unsigned long *delay_on,
unsigned long *delay_off)
{
led_trigger_blink_setup(trig, delay_on, delay_off, 0, 0);
}
EXPORT_SYMBOL_GPL(led_trigger_blink);
void led_trigger_blink_oneshot(struct led_trigger *trig,
unsigned long *delay_on,
unsigned long *delay_off,
int invert)
{
led_trigger_blink_setup(trig, delay_on, delay_off, 1, invert);
}
EXPORT_SYMBOL_GPL(led_trigger_blink_oneshot);
void led_trigger_register_simple(const char *name, struct led_trigger **tp)
{
struct led_trigger *trig;
int err;
trig = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
if (trig) {
trig->name = name;
err = led_trigger_register(trig);
if (err < 0) {
kfree(trig);
trig = NULL;
pr_warn("LED trigger %s failed to register (%d)\n",
name, err);
}
} else {
pr_warn("LED trigger %s failed to register (no memory)\n",
name);
}
*tp = trig;
}
EXPORT_SYMBOL_GPL(led_trigger_register_simple);
void led_trigger_unregister_simple(struct led_trigger *trig)
{
if (trig)
led_trigger_unregister(trig);
kfree(trig);
}
EXPORT_SYMBOL_GPL(led_trigger_unregister_simple);