linux_dsm_epyc7002/kernel/power/suspend.c
Len Brown 2fd77fff4b PM / suspend: make sync() on suspend-to-RAM build-time optional
The Linux kernel suspend path has traditionally invoked sys_sync()
before freezing user threads.

But sys_sync() can be expensive, and some user-space OS's do not want
the kernel to pay the cost of sys_sync() on every suspend -- preferring
invoke sync() from user-space if/when they want it.

So make sys_sync on suspend build-time optional.

The default is unchanged.

Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2015-07-31 23:46:05 +02:00

541 lines
13 KiB
C

/*
* kernel/power/suspend.c - Suspend to RAM and standby functionality.
*
* Copyright (c) 2003 Patrick Mochel
* Copyright (c) 2003 Open Source Development Lab
* Copyright (c) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
*
* This file is released under the GPLv2.
*/
#include <linux/string.h>
#include <linux/delay.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/console.h>
#include <linux/cpu.h>
#include <linux/cpuidle.h>
#include <linux/syscalls.h>
#include <linux/gfp.h>
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/suspend.h>
#include <linux/syscore_ops.h>
#include <linux/ftrace.h>
#include <trace/events/power.h>
#include <linux/compiler.h>
#include <linux/moduleparam.h>
#include "power.h"
const char *pm_labels[] = { "mem", "standby", "freeze", NULL };
const char *pm_states[PM_SUSPEND_MAX];
static const struct platform_suspend_ops *suspend_ops;
static const struct platform_freeze_ops *freeze_ops;
static DECLARE_WAIT_QUEUE_HEAD(suspend_freeze_wait_head);
enum freeze_state __read_mostly suspend_freeze_state;
static DEFINE_SPINLOCK(suspend_freeze_lock);
void freeze_set_ops(const struct platform_freeze_ops *ops)
{
lock_system_sleep();
freeze_ops = ops;
unlock_system_sleep();
}
static void freeze_begin(void)
{
suspend_freeze_state = FREEZE_STATE_NONE;
}
static void freeze_enter(void)
{
spin_lock_irq(&suspend_freeze_lock);
if (pm_wakeup_pending())
goto out;
suspend_freeze_state = FREEZE_STATE_ENTER;
spin_unlock_irq(&suspend_freeze_lock);
get_online_cpus();
cpuidle_resume();
/* Push all the CPUs into the idle loop. */
wake_up_all_idle_cpus();
pr_debug("PM: suspend-to-idle\n");
/* Make the current CPU wait so it can enter the idle loop too. */
wait_event(suspend_freeze_wait_head,
suspend_freeze_state == FREEZE_STATE_WAKE);
pr_debug("PM: resume from suspend-to-idle\n");
cpuidle_pause();
put_online_cpus();
spin_lock_irq(&suspend_freeze_lock);
out:
suspend_freeze_state = FREEZE_STATE_NONE;
spin_unlock_irq(&suspend_freeze_lock);
}
void freeze_wake(void)
{
unsigned long flags;
spin_lock_irqsave(&suspend_freeze_lock, flags);
if (suspend_freeze_state > FREEZE_STATE_NONE) {
suspend_freeze_state = FREEZE_STATE_WAKE;
wake_up(&suspend_freeze_wait_head);
}
spin_unlock_irqrestore(&suspend_freeze_lock, flags);
}
EXPORT_SYMBOL_GPL(freeze_wake);
static bool valid_state(suspend_state_t state)
{
/*
* PM_SUSPEND_STANDBY and PM_SUSPEND_MEM states need low level
* support and need to be valid to the low level
* implementation, no valid callback implies that none are valid.
*/
return suspend_ops && suspend_ops->valid && suspend_ops->valid(state);
}
/*
* If this is set, the "mem" label always corresponds to the deepest sleep state
* available, the "standby" label corresponds to the second deepest sleep state
* available (if any), and the "freeze" label corresponds to the remaining
* available sleep state (if there is one).
*/
static bool relative_states;
static int __init sleep_states_setup(char *str)
{
relative_states = !strncmp(str, "1", 1);
pm_states[PM_SUSPEND_FREEZE] = pm_labels[relative_states ? 0 : 2];
return 1;
}
__setup("relative_sleep_states=", sleep_states_setup);
/**
* suspend_set_ops - Set the global suspend method table.
* @ops: Suspend operations to use.
*/
void suspend_set_ops(const struct platform_suspend_ops *ops)
{
suspend_state_t i;
int j = 0;
lock_system_sleep();
suspend_ops = ops;
for (i = PM_SUSPEND_MEM; i >= PM_SUSPEND_STANDBY; i--)
if (valid_state(i)) {
pm_states[i] = pm_labels[j++];
} else if (!relative_states) {
pm_states[i] = NULL;
j++;
}
pm_states[PM_SUSPEND_FREEZE] = pm_labels[j];
unlock_system_sleep();
}
EXPORT_SYMBOL_GPL(suspend_set_ops);
/**
* suspend_valid_only_mem - Generic memory-only valid callback.
*
* Platform drivers that implement mem suspend only and only need to check for
* that in their .valid() callback can use this instead of rolling their own
* .valid() callback.
*/
int suspend_valid_only_mem(suspend_state_t state)
{
return state == PM_SUSPEND_MEM;
}
EXPORT_SYMBOL_GPL(suspend_valid_only_mem);
static bool sleep_state_supported(suspend_state_t state)
{
return state == PM_SUSPEND_FREEZE || (suspend_ops && suspend_ops->enter);
}
static int platform_suspend_prepare(suspend_state_t state)
{
return state != PM_SUSPEND_FREEZE && suspend_ops->prepare ?
suspend_ops->prepare() : 0;
}
static int platform_suspend_prepare_late(suspend_state_t state)
{
return state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->prepare ?
freeze_ops->prepare() : 0;
}
static int platform_suspend_prepare_noirq(suspend_state_t state)
{
return state != PM_SUSPEND_FREEZE && suspend_ops->prepare_late ?
suspend_ops->prepare_late() : 0;
}
static void platform_resume_noirq(suspend_state_t state)
{
if (state != PM_SUSPEND_FREEZE && suspend_ops->wake)
suspend_ops->wake();
}
static void platform_resume_early(suspend_state_t state)
{
if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->restore)
freeze_ops->restore();
}
static void platform_resume_finish(suspend_state_t state)
{
if (state != PM_SUSPEND_FREEZE && suspend_ops->finish)
suspend_ops->finish();
}
static int platform_suspend_begin(suspend_state_t state)
{
if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->begin)
return freeze_ops->begin();
else if (suspend_ops->begin)
return suspend_ops->begin(state);
else
return 0;
}
static void platform_resume_end(suspend_state_t state)
{
if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->end)
freeze_ops->end();
else if (suspend_ops->end)
suspend_ops->end();
}
static void platform_recover(suspend_state_t state)
{
if (state != PM_SUSPEND_FREEZE && suspend_ops->recover)
suspend_ops->recover();
}
static bool platform_suspend_again(suspend_state_t state)
{
return state != PM_SUSPEND_FREEZE && suspend_ops->suspend_again ?
suspend_ops->suspend_again() : false;
}
#ifdef CONFIG_PM_DEBUG
static unsigned int pm_test_delay = 5;
module_param(pm_test_delay, uint, 0644);
MODULE_PARM_DESC(pm_test_delay,
"Number of seconds to wait before resuming from suspend test");
#endif
static int suspend_test(int level)
{
#ifdef CONFIG_PM_DEBUG
if (pm_test_level == level) {
printk(KERN_INFO "suspend debug: Waiting for %d second(s).\n",
pm_test_delay);
mdelay(pm_test_delay * 1000);
return 1;
}
#endif /* !CONFIG_PM_DEBUG */
return 0;
}
/**
* suspend_prepare - Prepare for entering system sleep state.
*
* Common code run for every system sleep state that can be entered (except for
* hibernation). Run suspend notifiers, allocate the "suspend" console and
* freeze processes.
*/
static int suspend_prepare(suspend_state_t state)
{
int error;
if (!sleep_state_supported(state))
return -EPERM;
pm_prepare_console();
error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
if (error)
goto Finish;
trace_suspend_resume(TPS("freeze_processes"), 0, true);
error = suspend_freeze_processes();
trace_suspend_resume(TPS("freeze_processes"), 0, false);
if (!error)
return 0;
suspend_stats.failed_freeze++;
dpm_save_failed_step(SUSPEND_FREEZE);
Finish:
pm_notifier_call_chain(PM_POST_SUSPEND);
pm_restore_console();
return error;
}
/* default implementation */
void __weak arch_suspend_disable_irqs(void)
{
local_irq_disable();
}
/* default implementation */
void __weak arch_suspend_enable_irqs(void)
{
local_irq_enable();
}
/**
* suspend_enter - Make the system enter the given sleep state.
* @state: System sleep state to enter.
* @wakeup: Returns information that the sleep state should not be re-entered.
*
* This function should be called after devices have been suspended.
*/
static int suspend_enter(suspend_state_t state, bool *wakeup)
{
int error;
error = platform_suspend_prepare(state);
if (error)
goto Platform_finish;
error = dpm_suspend_late(PMSG_SUSPEND);
if (error) {
printk(KERN_ERR "PM: late suspend of devices failed\n");
goto Platform_finish;
}
error = platform_suspend_prepare_late(state);
if (error)
goto Devices_early_resume;
error = dpm_suspend_noirq(PMSG_SUSPEND);
if (error) {
printk(KERN_ERR "PM: noirq suspend of devices failed\n");
goto Platform_early_resume;
}
error = platform_suspend_prepare_noirq(state);
if (error)
goto Platform_wake;
if (suspend_test(TEST_PLATFORM))
goto Platform_wake;
/*
* PM_SUSPEND_FREEZE equals
* frozen processes + suspended devices + idle processors.
* Thus we should invoke freeze_enter() soon after
* all the devices are suspended.
*/
if (state == PM_SUSPEND_FREEZE) {
trace_suspend_resume(TPS("machine_suspend"), state, true);
freeze_enter();
trace_suspend_resume(TPS("machine_suspend"), state, false);
goto Platform_wake;
}
error = disable_nonboot_cpus();
if (error || suspend_test(TEST_CPUS))
goto Enable_cpus;
arch_suspend_disable_irqs();
BUG_ON(!irqs_disabled());
error = syscore_suspend();
if (!error) {
*wakeup = pm_wakeup_pending();
if (!(suspend_test(TEST_CORE) || *wakeup)) {
trace_suspend_resume(TPS("machine_suspend"),
state, true);
error = suspend_ops->enter(state);
trace_suspend_resume(TPS("machine_suspend"),
state, false);
events_check_enabled = false;
} else if (*wakeup) {
error = -EBUSY;
}
syscore_resume();
}
arch_suspend_enable_irqs();
BUG_ON(irqs_disabled());
Enable_cpus:
enable_nonboot_cpus();
Platform_wake:
platform_resume_noirq(state);
dpm_resume_noirq(PMSG_RESUME);
Platform_early_resume:
platform_resume_early(state);
Devices_early_resume:
dpm_resume_early(PMSG_RESUME);
Platform_finish:
platform_resume_finish(state);
return error;
}
/**
* suspend_devices_and_enter - Suspend devices and enter system sleep state.
* @state: System sleep state to enter.
*/
int suspend_devices_and_enter(suspend_state_t state)
{
int error;
bool wakeup = false;
if (!sleep_state_supported(state))
return -ENOSYS;
error = platform_suspend_begin(state);
if (error)
goto Close;
suspend_console();
suspend_test_start();
error = dpm_suspend_start(PMSG_SUSPEND);
if (error) {
pr_err("PM: Some devices failed to suspend, or early wake event detected\n");
goto Recover_platform;
}
suspend_test_finish("suspend devices");
if (suspend_test(TEST_DEVICES))
goto Recover_platform;
do {
error = suspend_enter(state, &wakeup);
} while (!error && !wakeup && platform_suspend_again(state));
Resume_devices:
suspend_test_start();
dpm_resume_end(PMSG_RESUME);
suspend_test_finish("resume devices");
trace_suspend_resume(TPS("resume_console"), state, true);
resume_console();
trace_suspend_resume(TPS("resume_console"), state, false);
Close:
platform_resume_end(state);
return error;
Recover_platform:
platform_recover(state);
goto Resume_devices;
}
/**
* suspend_finish - Clean up before finishing the suspend sequence.
*
* Call platform code to clean up, restart processes, and free the console that
* we've allocated. This routine is not called for hibernation.
*/
static void suspend_finish(void)
{
suspend_thaw_processes();
pm_notifier_call_chain(PM_POST_SUSPEND);
pm_restore_console();
}
/**
* enter_state - Do common work needed to enter system sleep state.
* @state: System sleep state to enter.
*
* Make sure that no one else is trying to put the system into a sleep state.
* Fail if that's not the case. Otherwise, prepare for system suspend, make the
* system enter the given sleep state and clean up after wakeup.
*/
static int enter_state(suspend_state_t state)
{
int error;
trace_suspend_resume(TPS("suspend_enter"), state, true);
if (state == PM_SUSPEND_FREEZE) {
#ifdef CONFIG_PM_DEBUG
if (pm_test_level != TEST_NONE && pm_test_level <= TEST_CPUS) {
pr_warning("PM: Unsupported test mode for suspend to idle,"
"please choose none/freezer/devices/platform.\n");
return -EAGAIN;
}
#endif
} else if (!valid_state(state)) {
return -EINVAL;
}
if (!mutex_trylock(&pm_mutex))
return -EBUSY;
if (state == PM_SUSPEND_FREEZE)
freeze_begin();
#ifndef CONFIG_SUSPEND_SKIP_SYNC
trace_suspend_resume(TPS("sync_filesystems"), 0, true);
printk(KERN_INFO "PM: Syncing filesystems ... ");
sys_sync();
printk("done.\n");
trace_suspend_resume(TPS("sync_filesystems"), 0, false);
#endif
pr_debug("PM: Preparing system for sleep (%s)\n", pm_states[state]);
error = suspend_prepare(state);
if (error)
goto Unlock;
if (suspend_test(TEST_FREEZER))
goto Finish;
trace_suspend_resume(TPS("suspend_enter"), state, false);
pr_debug("PM: Suspending system (%s)\n", pm_states[state]);
pm_restrict_gfp_mask();
error = suspend_devices_and_enter(state);
pm_restore_gfp_mask();
Finish:
pr_debug("PM: Finishing wakeup.\n");
suspend_finish();
Unlock:
mutex_unlock(&pm_mutex);
return error;
}
/**
* pm_suspend - Externally visible function for suspending the system.
* @state: System sleep state to enter.
*
* Check if the value of @state represents one of the supported states,
* execute enter_state() and update system suspend statistics.
*/
int pm_suspend(suspend_state_t state)
{
int error;
if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX)
return -EINVAL;
error = enter_state(state);
if (error) {
suspend_stats.fail++;
dpm_save_failed_errno(error);
} else {
suspend_stats.success++;
}
return error;
}
EXPORT_SYMBOL(pm_suspend);