linux_dsm_epyc7002/kernel/cpu_pm.c
Alex Shi 313c8c16ee PM / CPU: replace raw_notifier with atomic_notifier
This patch replaces an rwlock and raw notifier by an atomic notifier
protected by a spin_lock and RCU.

The main reason for this change is due to a 'scheduling while atomic'
bug with RT kernels on ARM/ARM64. On ARM/ARM64, the rwlock
cpu_pm_notifier_lock in cpu_pm_enter/exit() causes a potential
schedule after IRQ disable in the idle call chain:

cpu_startup_entry
  cpu_idle_loop
    local_irq_disable()
    cpuidle_idle_call
      call_cpuidle
        cpuidle_enter
          cpuidle_enter_state
            ->enter :arm_enter_idle_state
              cpu_pm_enter/exit
                CPU_PM_CPU_IDLE_ENTER
                  read_lock(&cpu_pm_notifier_lock); <-- sleep in idle
                     __rt_spin_lock();
                        schedule();

The kernel panic is here:
[    4.609601] BUG: scheduling while atomic: swapper/1/0/0x00000002
[    4.609608] [<ffff0000086fae70>] arm_enter_idle_state+0x18/0x70
[    4.609614] Modules linked in:
[    4.609615] [<ffff0000086f9298>] cpuidle_enter_state+0xf0/0x218
[    4.609620] [<ffff0000086f93f8>] cpuidle_enter+0x18/0x20
[    4.609626] Preemption disabled at:
[    4.609627] [<ffff0000080fa234>] call_cpuidle+0x24/0x40
[    4.609635] [<ffff000008882fa4>] schedule_preempt_disabled+0x1c/0x28
[    4.609639] [<ffff0000080fa49c>] cpu_startup_entry+0x154/0x1f8
[    4.609645] [<ffff00000808e004>] secondary_start_kernel+0x15c/0x1a0

Daniel Lezcano said this notification is needed on arm/arm64 platforms.
Sebastian suggested using atomic_notifier instead of rwlock, which is not
only removing the sleeping in idle, but also improving latency.

Tony Lindgren found a miss use that rcu_read_lock used after rcu_idle_enter
Paul McKenney suggested trying RCU_NONIDLE.

Signed-off-by: Alex Shi <alex.shi@linaro.org>
Tested-by: Tony Lindgren <tony@atomide.com>
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
[ rjw: Subject & changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2017-07-31 13:09:49 +02:00

210 lines
6.0 KiB
C

/*
* Copyright (C) 2011 Google, Inc.
*
* Author:
* Colin Cross <ccross@android.com>
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include <linux/kernel.h>
#include <linux/cpu_pm.h>
#include <linux/module.h>
#include <linux/notifier.h>
#include <linux/spinlock.h>
#include <linux/syscore_ops.h>
static ATOMIC_NOTIFIER_HEAD(cpu_pm_notifier_chain);
static int cpu_pm_notify(enum cpu_pm_event event, int nr_to_call, int *nr_calls)
{
int ret;
/*
* __atomic_notifier_call_chain has a RCU read critical section, which
* could be disfunctional in cpu idle. Copy RCU_NONIDLE code to let
* RCU know this.
*/
rcu_irq_enter_irqson();
ret = __atomic_notifier_call_chain(&cpu_pm_notifier_chain, event, NULL,
nr_to_call, nr_calls);
rcu_irq_exit_irqson();
return notifier_to_errno(ret);
}
/**
* cpu_pm_register_notifier - register a driver with cpu_pm
* @nb: notifier block to register
*
* Add a driver to a list of drivers that are notified about
* CPU and CPU cluster low power entry and exit.
*
* This function may sleep, and has the same return conditions as
* raw_notifier_chain_register.
*/
int cpu_pm_register_notifier(struct notifier_block *nb)
{
return atomic_notifier_chain_register(&cpu_pm_notifier_chain, nb);
}
EXPORT_SYMBOL_GPL(cpu_pm_register_notifier);
/**
* cpu_pm_unregister_notifier - unregister a driver with cpu_pm
* @nb: notifier block to be unregistered
*
* Remove a driver from the CPU PM notifier list.
*
* This function may sleep, and has the same return conditions as
* raw_notifier_chain_unregister.
*/
int cpu_pm_unregister_notifier(struct notifier_block *nb)
{
return atomic_notifier_chain_unregister(&cpu_pm_notifier_chain, nb);
}
EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier);
/**
* cpu_pm_enter - CPU low power entry notifier
*
* Notifies listeners that a single CPU is entering a low power state that may
* cause some blocks in the same power domain as the cpu to reset.
*
* Must be called on the affected CPU with interrupts disabled. Platform is
* responsible for ensuring that cpu_pm_enter is not called twice on the same
* CPU before cpu_pm_exit is called. Notified drivers can include VFP
* co-processor, interrupt controller and its PM extensions, local CPU
* timers context save/restore which shouldn't be interrupted. Hence it
* must be called with interrupts disabled.
*
* Return conditions are same as __raw_notifier_call_chain.
*/
int cpu_pm_enter(void)
{
int nr_calls;
int ret = 0;
ret = cpu_pm_notify(CPU_PM_ENTER, -1, &nr_calls);
if (ret)
/*
* Inform listeners (nr_calls - 1) about failure of CPU PM
* PM entry who are notified earlier to prepare for it.
*/
cpu_pm_notify(CPU_PM_ENTER_FAILED, nr_calls - 1, NULL);
return ret;
}
EXPORT_SYMBOL_GPL(cpu_pm_enter);
/**
* cpu_pm_exit - CPU low power exit notifier
*
* Notifies listeners that a single CPU is exiting a low power state that may
* have caused some blocks in the same power domain as the cpu to reset.
*
* Notified drivers can include VFP co-processor, interrupt controller
* and its PM extensions, local CPU timers context save/restore which
* shouldn't be interrupted. Hence it must be called with interrupts disabled.
*
* Return conditions are same as __raw_notifier_call_chain.
*/
int cpu_pm_exit(void)
{
return cpu_pm_notify(CPU_PM_EXIT, -1, NULL);
}
EXPORT_SYMBOL_GPL(cpu_pm_exit);
/**
* cpu_cluster_pm_enter - CPU cluster low power entry notifier
*
* Notifies listeners that all cpus in a power domain are entering a low power
* state that may cause some blocks in the same power domain to reset.
*
* Must be called after cpu_pm_enter has been called on all cpus in the power
* domain, and before cpu_pm_exit has been called on any cpu in the power
* domain. Notified drivers can include VFP co-processor, interrupt controller
* and its PM extensions, local CPU timers context save/restore which
* shouldn't be interrupted. Hence it must be called with interrupts disabled.
*
* Must be called with interrupts disabled.
*
* Return conditions are same as __raw_notifier_call_chain.
*/
int cpu_cluster_pm_enter(void)
{
int nr_calls;
int ret = 0;
ret = cpu_pm_notify(CPU_CLUSTER_PM_ENTER, -1, &nr_calls);
if (ret)
/*
* Inform listeners (nr_calls - 1) about failure of CPU cluster
* PM entry who are notified earlier to prepare for it.
*/
cpu_pm_notify(CPU_CLUSTER_PM_ENTER_FAILED, nr_calls - 1, NULL);
return ret;
}
EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter);
/**
* cpu_cluster_pm_exit - CPU cluster low power exit notifier
*
* Notifies listeners that all cpus in a power domain are exiting form a
* low power state that may have caused some blocks in the same power domain
* to reset.
*
* Must be called after cpu_cluster_pm_enter has been called for the power
* domain, and before cpu_pm_exit has been called on any cpu in the power
* domain. Notified drivers can include VFP co-processor, interrupt controller
* and its PM extensions, local CPU timers context save/restore which
* shouldn't be interrupted. Hence it must be called with interrupts disabled.
*
* Return conditions are same as __raw_notifier_call_chain.
*/
int cpu_cluster_pm_exit(void)
{
return cpu_pm_notify(CPU_CLUSTER_PM_EXIT, -1, NULL);
}
EXPORT_SYMBOL_GPL(cpu_cluster_pm_exit);
#ifdef CONFIG_PM
static int cpu_pm_suspend(void)
{
int ret;
ret = cpu_pm_enter();
if (ret)
return ret;
ret = cpu_cluster_pm_enter();
return ret;
}
static void cpu_pm_resume(void)
{
cpu_cluster_pm_exit();
cpu_pm_exit();
}
static struct syscore_ops cpu_pm_syscore_ops = {
.suspend = cpu_pm_suspend,
.resume = cpu_pm_resume,
};
static int cpu_pm_init(void)
{
register_syscore_ops(&cpu_pm_syscore_ops);
return 0;
}
core_initcall(cpu_pm_init);
#endif