linux_dsm_epyc7002/drivers/cpufreq/cpufreq_userspace.c
Venki Pallipadi 9e76988e93 [CPUFREQ] Eliminate cpufreq_userspace scaling_setspeed deadlock
Eliminate cpufreq_userspace scaling_setspeed deadlock.

Luming Yu recently uncovered yet another cpufreq related deadlock.
One thread that continuously switches the governors and the other thread that
repeatedly cats the contents of cpufreq directory causes both these threads to
go into a deadlock.

Detailed examination of the deadlock showed the exact flow before the deadlock
as:

Thread 1			Thread 2
________			________
				cats files under /sys/devices/.../cpufreq/
Set governor to userspace
  Adds a new sysfs entry for
  scaling_setspeed
				cats files under /sys/devices/.../cpufreq/

Set governor to performance
  Holds cpufreq_rw_sem in write
  mode
  Sends a STOP notify to
  userspace governor
				cat /sys/devices/.../cpufreq/scaling_setspeed
				  Gets a handle on the above sysfs entry with
				  sysfs_get_active
				  Blocks while trying to get cpufreq_rw_sem
				  in read mode
  Remove a sysfs entry for
  scaling_setspeed
    Blocks on sysfs_deactivate
    while waiting for earlier
    get_active (on other thread)
    to drain

At this point both threads go into deadlock and any other thread that tries to
do anything with sysfs cpufreq will also block.

There seems to be no easy way to avoid this deadlock as long as
cpufreq_userspace adds/removes the sysfs entry under same kobject as cpufreq.
Below patch moves scaling_setspeed to cpufreq.c, keeping it always and calling
back the governor on read/write. This is the cleanest fix I could think of,
even though adding two callbacks in governor structure just for this seems
unnecessary.

Note that the change makes scaling_setspeed under /sys/.../cpufreq permanent
and returns <unsupported> when governor is not userspace.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Dave Jones <davej@redhat.com>
2008-02-06 22:57:58 -05:00

214 lines
5.7 KiB
C

/*
* linux/drivers/cpufreq/cpufreq_userspace.c
*
* Copyright (C) 2001 Russell King
* (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/cpufreq.h>
#include <linux/cpu.h>
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/sysfs.h>
#include <linux/mutex.h>
#include <asm/uaccess.h>
/**
* A few values needed by the userspace governor
*/
static unsigned int cpu_max_freq[NR_CPUS];
static unsigned int cpu_min_freq[NR_CPUS];
static unsigned int cpu_cur_freq[NR_CPUS]; /* current CPU freq */
static unsigned int cpu_set_freq[NR_CPUS]; /* CPU freq desired by userspace */
static unsigned int cpu_is_managed[NR_CPUS];
static DEFINE_MUTEX (userspace_mutex);
static int cpus_using_userspace_governor;
#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "userspace", msg)
/* keep track of frequency transitions */
static int
userspace_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
struct cpufreq_freqs *freq = data;
if (!cpu_is_managed[freq->cpu])
return 0;
dprintk("saving cpu_cur_freq of cpu %u to be %u kHz\n",
freq->cpu, freq->new);
cpu_cur_freq[freq->cpu] = freq->new;
return 0;
}
static struct notifier_block userspace_cpufreq_notifier_block = {
.notifier_call = userspace_cpufreq_notifier
};
/**
* cpufreq_set - set the CPU frequency
* @policy: pointer to policy struct where freq is being set
* @freq: target frequency in kHz
*
* Sets the CPU frequency to freq.
*/
static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq)
{
int ret = -EINVAL;
dprintk("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq);
mutex_lock(&userspace_mutex);
if (!cpu_is_managed[policy->cpu])
goto err;
cpu_set_freq[policy->cpu] = freq;
if (freq < cpu_min_freq[policy->cpu])
freq = cpu_min_freq[policy->cpu];
if (freq > cpu_max_freq[policy->cpu])
freq = cpu_max_freq[policy->cpu];
/*
* We're safe from concurrent calls to ->target() here
* as we hold the userspace_mutex lock. If we were calling
* cpufreq_driver_target, a deadlock situation might occur:
* A: cpufreq_set (lock userspace_mutex) -> cpufreq_driver_target(lock policy->lock)
* B: cpufreq_set_policy(lock policy->lock) -> __cpufreq_governor -> cpufreq_governor_userspace (lock userspace_mutex)
*/
ret = __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
err:
mutex_unlock(&userspace_mutex);
return ret;
}
static ssize_t show_speed(struct cpufreq_policy *policy, char *buf)
{
return sprintf(buf, "%u\n", cpu_cur_freq[policy->cpu]);
}
static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
unsigned int event)
{
unsigned int cpu = policy->cpu;
int rc = 0;
switch (event) {
case CPUFREQ_GOV_START:
if (!cpu_online(cpu))
return -EINVAL;
BUG_ON(!policy->cur);
mutex_lock(&userspace_mutex);
if (cpus_using_userspace_governor == 0) {
cpufreq_register_notifier(
&userspace_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
}
cpus_using_userspace_governor++;
cpu_is_managed[cpu] = 1;
cpu_min_freq[cpu] = policy->min;
cpu_max_freq[cpu] = policy->max;
cpu_cur_freq[cpu] = policy->cur;
cpu_set_freq[cpu] = policy->cur;
dprintk("managing cpu %u started (%u - %u kHz, currently %u kHz)\n", cpu, cpu_min_freq[cpu], cpu_max_freq[cpu], cpu_cur_freq[cpu]);
mutex_unlock(&userspace_mutex);
break;
case CPUFREQ_GOV_STOP:
mutex_lock(&userspace_mutex);
cpus_using_userspace_governor--;
if (cpus_using_userspace_governor == 0) {
cpufreq_unregister_notifier(
&userspace_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
}
cpu_is_managed[cpu] = 0;
cpu_min_freq[cpu] = 0;
cpu_max_freq[cpu] = 0;
cpu_set_freq[cpu] = 0;
dprintk("managing cpu %u stopped\n", cpu);
mutex_unlock(&userspace_mutex);
break;
case CPUFREQ_GOV_LIMITS:
mutex_lock(&userspace_mutex);
dprintk("limit event for cpu %u: %u - %u kHz,"
"currently %u kHz, last set to %u kHz\n",
cpu, policy->min, policy->max,
cpu_cur_freq[cpu], cpu_set_freq[cpu]);
if (policy->max < cpu_set_freq[cpu]) {
__cpufreq_driver_target(policy, policy->max,
CPUFREQ_RELATION_H);
}
else if (policy->min > cpu_set_freq[cpu]) {
__cpufreq_driver_target(policy, policy->min,
CPUFREQ_RELATION_L);
}
else {
__cpufreq_driver_target(policy, cpu_set_freq[cpu],
CPUFREQ_RELATION_L);
}
cpu_min_freq[cpu] = policy->min;
cpu_max_freq[cpu] = policy->max;
cpu_cur_freq[cpu] = policy->cur;
mutex_unlock(&userspace_mutex);
break;
}
return rc;
}
struct cpufreq_governor cpufreq_gov_userspace = {
.name = "userspace",
.governor = cpufreq_governor_userspace,
.store_setspeed = cpufreq_set,
.show_setspeed = show_speed,
.owner = THIS_MODULE,
};
EXPORT_SYMBOL(cpufreq_gov_userspace);
static int __init cpufreq_gov_userspace_init(void)
{
return cpufreq_register_governor(&cpufreq_gov_userspace);
}
static void __exit cpufreq_gov_userspace_exit(void)
{
cpufreq_unregister_governor(&cpufreq_gov_userspace);
}
MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>, Russell King <rmk@arm.linux.org.uk>");
MODULE_DESCRIPTION ("CPUfreq policy governor 'userspace'");
MODULE_LICENSE ("GPL");
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE
fs_initcall(cpufreq_gov_userspace_init);
#else
module_init(cpufreq_gov_userspace_init);
#endif
module_exit(cpufreq_gov_userspace_exit);