Merge branch 'cpuidle' into next

This commit is contained in:
Benjamin Herrenschmidt 2011-12-16 11:09:11 +11:00
commit e6f08d37e6
13 changed files with 411 additions and 89 deletions

View File

@ -87,6 +87,10 @@ config ARCH_HAS_ILOG2_U64
bool
default y if 64BIT
config ARCH_HAS_CPU_IDLE_WAIT
bool
default y
config GENERIC_HWEIGHT
bool
default y

View File

@ -382,6 +382,9 @@ static inline unsigned long get_clean_sp(struct pt_regs *regs, int is_32)
}
#endif
extern unsigned long cpuidle_disable;
enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_PROCESSOR_H */

View File

@ -221,6 +221,15 @@ extern unsigned long klimit;
extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
extern int powersave_nap; /* set if nap mode can be used in idle loop */
void cpu_idle_wait(void);
#ifdef CONFIG_PSERIES_IDLE
extern void update_smt_snooze_delay(int snooze);
extern int pseries_notify_cpuidle_add_cpu(int cpu);
#else
static inline void update_smt_snooze_delay(int snooze) {}
static inline int pseries_notify_cpuidle_add_cpu(int cpu) { return 0; }
#endif
/*
* Atomic exchange

View File

@ -39,9 +39,13 @@
#define cpu_should_die() 0
#endif
unsigned long cpuidle_disable = IDLE_NO_OVERRIDE;
EXPORT_SYMBOL(cpuidle_disable);
static int __init powersave_off(char *arg)
{
ppc_md.power_save = NULL;
cpuidle_disable = IDLE_POWERSAVE_OFF;
return 0;
}
__setup("powersave=off", powersave_off);
@ -102,6 +106,29 @@ void cpu_idle(void)
}
}
/*
* cpu_idle_wait - Used to ensure that all the CPUs come out of the old
* idle loop and start using the new idle loop.
* Required while changing idle handler on SMP systems.
* Caller must have changed idle handler to the new value before the call.
* This window may be larger on shared systems.
*/
void cpu_idle_wait(void)
{
int cpu;
smp_mb();
/* kick all the CPUs so that they exit out of old idle routine */
get_online_cpus();
for_each_online_cpu(cpu) {
if (cpu != smp_processor_id())
smp_send_reschedule(cpu);
}
put_online_cpus();
}
EXPORT_SYMBOL_GPL(cpu_idle_wait);
int powersave_nap;
#ifdef CONFIG_SYSCTL

View File

@ -18,6 +18,7 @@
#include <asm/machdep.h>
#include <asm/smp.h>
#include <asm/pmc.h>
#include <asm/system.h>
#include "cacheinfo.h"
@ -51,6 +52,7 @@ static ssize_t store_smt_snooze_delay(struct sys_device *dev,
return -EINVAL;
per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze;
update_smt_snooze_delay(snooze);
return count;
}

View File

@ -211,6 +211,12 @@ config PPC_PASEMI_CPUFREQ
endmenu
menu "CPUIdle driver"
source "drivers/cpuidle/Kconfig"
endmenu
config PPC601_SYNC_FIX
bool "Workarounds for PPC601 bugs"
depends on 6xx && (PPC_PREP || PPC_PMAC)

View File

@ -120,3 +120,12 @@ config DTL
which are accessible through a debugfs file.
Say N if you are unsure.
config PSERIES_IDLE
tristate "Cpuidle driver for pSeries platforms"
depends on CPU_IDLE
depends on PPC_PSERIES
default y
help
Select this option to enable processor idle state management
through cpuidle subsystem.

View File

@ -22,6 +22,7 @@ obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_DTL) += dtl.o
obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o
obj-$(CONFIG_PSERIES_IDLE) += processor_idle.o
ifeq ($(CONFIG_PPC_PSERIES),y)
obj-$(CONFIG_SUSPEND) += suspend.o

View File

@ -0,0 +1,329 @@
/*
* processor_idle - idle state cpuidle driver.
* Adapted from drivers/idle/intel_idle.c and
* drivers/acpi/processor_idle.c
*
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/moduleparam.h>
#include <linux/cpuidle.h>
#include <linux/cpu.h>
#include <asm/paca.h>
#include <asm/reg.h>
#include <asm/system.h>
#include <asm/machdep.h>
#include <asm/firmware.h>
#include "plpar_wrappers.h"
#include "pseries.h"
struct cpuidle_driver pseries_idle_driver = {
.name = "pseries_idle",
.owner = THIS_MODULE,
};
#define MAX_IDLE_STATE_COUNT 2
static int max_idle_state = MAX_IDLE_STATE_COUNT - 1;
static struct cpuidle_device __percpu *pseries_cpuidle_devices;
static struct cpuidle_state *cpuidle_state_table;
void update_smt_snooze_delay(int snooze)
{
struct cpuidle_driver *drv = cpuidle_get_driver();
if (drv)
drv->states[0].target_residency = snooze;
}
static inline void idle_loop_prolog(unsigned long *in_purr, ktime_t *kt_before)
{
*kt_before = ktime_get_real();
*in_purr = mfspr(SPRN_PURR);
/*
* Indicate to the HV that we are idle. Now would be
* a good time to find other work to dispatch.
*/
get_lppaca()->idle = 1;
}
static inline s64 idle_loop_epilog(unsigned long in_purr, ktime_t kt_before)
{
get_lppaca()->wait_state_cycles += mfspr(SPRN_PURR) - in_purr;
get_lppaca()->idle = 0;
return ktime_to_us(ktime_sub(ktime_get_real(), kt_before));
}
static int snooze_loop(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
{
unsigned long in_purr;
ktime_t kt_before;
unsigned long start_snooze;
long snooze = drv->states[0].target_residency;
idle_loop_prolog(&in_purr, &kt_before);
if (snooze) {
start_snooze = get_tb() + snooze * tb_ticks_per_usec;
local_irq_enable();
set_thread_flag(TIF_POLLING_NRFLAG);
while ((snooze < 0) || (get_tb() < start_snooze)) {
if (need_resched() || cpu_is_offline(dev->cpu))
goto out;
ppc64_runlatch_off();
HMT_low();
HMT_very_low();
}
HMT_medium();
clear_thread_flag(TIF_POLLING_NRFLAG);
smp_mb();
local_irq_disable();
}
out:
HMT_medium();
dev->last_residency =
(int)idle_loop_epilog(in_purr, kt_before);
return index;
}
static int dedicated_cede_loop(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
{
unsigned long in_purr;
ktime_t kt_before;
idle_loop_prolog(&in_purr, &kt_before);
get_lppaca()->donate_dedicated_cpu = 1;
ppc64_runlatch_off();
HMT_medium();
cede_processor();
get_lppaca()->donate_dedicated_cpu = 0;
dev->last_residency =
(int)idle_loop_epilog(in_purr, kt_before);
return index;
}
static int shared_cede_loop(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
{
unsigned long in_purr;
ktime_t kt_before;
idle_loop_prolog(&in_purr, &kt_before);
/*
* Yield the processor to the hypervisor. We return if
* an external interrupt occurs (which are driven prior
* to returning here) or if a prod occurs from another
* processor. When returning here, external interrupts
* are enabled.
*/
cede_processor();
dev->last_residency =
(int)idle_loop_epilog(in_purr, kt_before);
return index;
}
/*
* States for dedicated partition case.
*/
static struct cpuidle_state dedicated_states[MAX_IDLE_STATE_COUNT] = {
{ /* Snooze */
.name = "snooze",
.desc = "snooze",
.flags = CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 0,
.target_residency = 0,
.enter = &snooze_loop },
{ /* CEDE */
.name = "CEDE",
.desc = "CEDE",
.flags = CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 1,
.target_residency = 10,
.enter = &dedicated_cede_loop },
};
/*
* States for shared partition case.
*/
static struct cpuidle_state shared_states[MAX_IDLE_STATE_COUNT] = {
{ /* Shared Cede */
.name = "Shared Cede",
.desc = "Shared Cede",
.flags = CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 0,
.target_residency = 0,
.enter = &shared_cede_loop },
};
int pseries_notify_cpuidle_add_cpu(int cpu)
{
struct cpuidle_device *dev =
per_cpu_ptr(pseries_cpuidle_devices, cpu);
if (dev && cpuidle_get_driver()) {
cpuidle_disable_device(dev);
cpuidle_enable_device(dev);
}
return 0;
}
/*
* pseries_cpuidle_driver_init()
*/
static int pseries_cpuidle_driver_init(void)
{
int idle_state;
struct cpuidle_driver *drv = &pseries_idle_driver;
drv->state_count = 0;
for (idle_state = 0; idle_state < MAX_IDLE_STATE_COUNT; ++idle_state) {
if (idle_state > max_idle_state)
break;
/* is the state not enabled? */
if (cpuidle_state_table[idle_state].enter == NULL)
continue;
drv->states[drv->state_count] = /* structure copy */
cpuidle_state_table[idle_state];
if (cpuidle_state_table == dedicated_states)
drv->states[drv->state_count].target_residency =
__get_cpu_var(smt_snooze_delay);
drv->state_count += 1;
}
return 0;
}
/* pseries_idle_devices_uninit(void)
* unregister cpuidle devices and de-allocate memory
*/
static void pseries_idle_devices_uninit(void)
{
int i;
struct cpuidle_device *dev;
for_each_possible_cpu(i) {
dev = per_cpu_ptr(pseries_cpuidle_devices, i);
cpuidle_unregister_device(dev);
}
free_percpu(pseries_cpuidle_devices);
return;
}
/* pseries_idle_devices_init()
* allocate, initialize and register cpuidle device
*/
static int pseries_idle_devices_init(void)
{
int i;
struct cpuidle_driver *drv = &pseries_idle_driver;
struct cpuidle_device *dev;
pseries_cpuidle_devices = alloc_percpu(struct cpuidle_device);
if (pseries_cpuidle_devices == NULL)
return -ENOMEM;
for_each_possible_cpu(i) {
dev = per_cpu_ptr(pseries_cpuidle_devices, i);
dev->state_count = drv->state_count;
dev->cpu = i;
if (cpuidle_register_device(dev)) {
printk(KERN_DEBUG \
"cpuidle_register_device %d failed!\n", i);
return -EIO;
}
}
return 0;
}
/*
* pseries_idle_probe()
* Choose state table for shared versus dedicated partition
*/
static int pseries_idle_probe(void)
{
if (!firmware_has_feature(FW_FEATURE_SPLPAR))
return -ENODEV;
if (cpuidle_disable != IDLE_NO_OVERRIDE)
return -ENODEV;
if (max_idle_state == 0) {
printk(KERN_DEBUG "pseries processor idle disabled.\n");
return -EPERM;
}
if (get_lppaca()->shared_proc)
cpuidle_state_table = shared_states;
else
cpuidle_state_table = dedicated_states;
return 0;
}
static int __init pseries_processor_idle_init(void)
{
int retval;
retval = pseries_idle_probe();
if (retval)
return retval;
pseries_cpuidle_driver_init();
retval = cpuidle_register_driver(&pseries_idle_driver);
if (retval) {
printk(KERN_DEBUG "Registration of pseries driver failed.\n");
return retval;
}
retval = pseries_idle_devices_init();
if (retval) {
pseries_idle_devices_uninit();
cpuidle_unregister_driver(&pseries_idle_driver);
return retval;
}
printk(KERN_DEBUG "pseries_idle_driver registered\n");
return 0;
}
static void __exit pseries_processor_idle_exit(void)
{
pseries_idle_devices_uninit();
cpuidle_unregister_driver(&pseries_idle_driver);
return;
}
module_init(pseries_processor_idle_init);
module_exit(pseries_processor_idle_exit);
MODULE_AUTHOR("Deepthi Dharwar <deepthi@linux.vnet.ibm.com>");
MODULE_DESCRIPTION("Cpuidle driver for POWER");
MODULE_LICENSE("GPL");

View File

@ -57,4 +57,7 @@ extern struct device_node *dlpar_configure_connector(u32);
extern int dlpar_attach_node(struct device_node *);
extern int dlpar_detach_node(struct device_node *);
/* Snooze Delay, pseries_idle */
DECLARE_PER_CPU(long, smt_snooze_delay);
#endif /* _PSERIES_PSERIES_H */

View File

@ -39,6 +39,7 @@
#include <linux/irq.h>
#include <linux/seq_file.h>
#include <linux/root_dev.h>
#include <linux/cpuidle.h>
#include <asm/mmu.h>
#include <asm/processor.h>
@ -74,9 +75,6 @@ EXPORT_SYMBOL(CMO_PageSize);
int fwnmi_active; /* TRUE if an FWNMI handler is present */
static void pseries_shared_idle_sleep(void);
static void pseries_dedicated_idle_sleep(void);
static struct device_node *pSeries_mpic_node;
static void pSeries_show_cpuinfo(struct seq_file *m)
@ -351,6 +349,21 @@ static int alloc_dispatch_log_kmem_cache(void)
}
early_initcall(alloc_dispatch_log_kmem_cache);
static void pSeries_idle(void)
{
/* This would call on the cpuidle framework, and the back-end pseries
* driver to go to idle states
*/
if (cpuidle_idle_call()) {
/* On error, execute default handler
* to go into low thread priority and possibly
* low power mode.
*/
HMT_low();
HMT_very_low();
}
}
static void __init pSeries_setup_arch(void)
{
/* Discover PIC type and setup ppc_md accordingly */
@ -373,18 +386,9 @@ static void __init pSeries_setup_arch(void)
pSeries_nvram_init();
/* Choose an idle loop */
if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
vpa_init(boot_cpuid);
if (get_lppaca()->shared_proc) {
printk(KERN_DEBUG "Using shared processor idle loop\n");
ppc_md.power_save = pseries_shared_idle_sleep;
} else {
printk(KERN_DEBUG "Using dedicated idle loop\n");
ppc_md.power_save = pseries_dedicated_idle_sleep;
}
} else {
printk(KERN_DEBUG "Using default idle loop\n");
ppc_md.power_save = pSeries_idle;
}
if (firmware_has_feature(FW_FEATURE_LPAR))
@ -585,80 +589,6 @@ static int __init pSeries_probe(void)
return 1;
}
DECLARE_PER_CPU(long, smt_snooze_delay);
static void pseries_dedicated_idle_sleep(void)
{
unsigned int cpu = smp_processor_id();
unsigned long start_snooze;
unsigned long in_purr, out_purr;
long snooze = __get_cpu_var(smt_snooze_delay);
/*
* Indicate to the HV that we are idle. Now would be
* a good time to find other work to dispatch.
*/
get_lppaca()->idle = 1;
get_lppaca()->donate_dedicated_cpu = 1;
in_purr = mfspr(SPRN_PURR);
/*
* We come in with interrupts disabled, and need_resched()
* has been checked recently. If we should poll for a little
* while, do so.
*/
if (snooze) {
start_snooze = get_tb() + snooze * tb_ticks_per_usec;
local_irq_enable();
set_thread_flag(TIF_POLLING_NRFLAG);
while ((snooze < 0) || (get_tb() < start_snooze)) {
if (need_resched() || cpu_is_offline(cpu))
goto out;
ppc64_runlatch_off();
HMT_low();
HMT_very_low();
}
HMT_medium();
clear_thread_flag(TIF_POLLING_NRFLAG);
smp_mb();
local_irq_disable();
if (need_resched() || cpu_is_offline(cpu))
goto out;
}
cede_processor();
out:
HMT_medium();
out_purr = mfspr(SPRN_PURR);
get_lppaca()->wait_state_cycles += out_purr - in_purr;
get_lppaca()->donate_dedicated_cpu = 0;
get_lppaca()->idle = 0;
}
static void pseries_shared_idle_sleep(void)
{
/*
* Indicate to the HV that we are idle. Now would be
* a good time to find other work to dispatch.
*/
get_lppaca()->idle = 1;
/*
* Yield the processor to the hypervisor. We return if
* an external interrupt occurs (which are driven prior
* to returning here) or if a prod occurs from another
* processor. When returning here, external interrupts
* are enabled.
*/
cede_processor();
get_lppaca()->idle = 0;
}
static int pSeries_pci_probe_mode(struct pci_bus *bus)
{
if (firmware_has_feature(FW_FEATURE_LPAR))

View File

@ -148,6 +148,7 @@ static void __devinit smp_xics_setup_cpu(int cpu)
set_cpu_current_state(cpu, CPU_STATE_ONLINE);
set_default_offline_state(cpu);
#endif
pseries_notify_cpuidle_add_cpu(cpu);
}
static int __devinit smp_pSeries_kick_cpu(int nr)

View File

@ -130,7 +130,6 @@ struct cpuidle_driver {
#ifdef CONFIG_CPU_IDLE
extern void disable_cpuidle(void);
extern int cpuidle_idle_call(void);
extern int cpuidle_register_driver(struct cpuidle_driver *drv);
struct cpuidle_driver *cpuidle_get_driver(void);
extern void cpuidle_unregister_driver(struct cpuidle_driver *drv);
@ -145,7 +144,6 @@ extern void cpuidle_disable_device(struct cpuidle_device *dev);
#else
static inline void disable_cpuidle(void) { }
static inline int cpuidle_idle_call(void) { return -ENODEV; }
static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
{return -ENODEV; }
static inline struct cpuidle_driver *cpuidle_get_driver(void) {return NULL; }