linux_dsm_epyc7002/drivers/cpufreq/cpufreq_conservative.c

379 lines
9.8 KiB
C
Raw Normal View History

/*
* drivers/cpufreq/cpufreq_conservative.c
*
* Copyright (C) 2001 Russell King
* (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
* Jun Nakajima <jun.nakajima@intel.com>
* (C) 2009 Alexander Clouter <alex@digriz.org.uk>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/slab.h>
#include "cpufreq_governor.h"
struct cs_policy_dbs_info {
struct policy_dbs_info policy_dbs;
unsigned int down_skip;
unsigned int requested_freq;
};
static inline struct cs_policy_dbs_info *to_dbs_info(struct policy_dbs_info *policy_dbs)
{
return container_of(policy_dbs, struct cs_policy_dbs_info, policy_dbs);
}
struct cs_dbs_tuners {
unsigned int down_threshold;
unsigned int freq_step;
};
/* Conservative governor macros */
#define DEF_FREQUENCY_UP_THRESHOLD (80)
#define DEF_FREQUENCY_DOWN_THRESHOLD (20)
#define DEF_FREQUENCY_STEP (5)
#define DEF_SAMPLING_DOWN_FACTOR (1)
#define MAX_SAMPLING_DOWN_FACTOR (10)
static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners,
struct cpufreq_policy *policy)
{
unsigned int freq_target = (cs_tuners->freq_step * policy->max) / 100;
/* max freq cannot be less than 100. But who knows... */
if (unlikely(freq_target == 0))
freq_target = DEF_FREQUENCY_STEP;
return freq_target;
}
/*
* Every sampling_rate, we check, if current idle time is less than 20%
* (default), then we try to increase frequency. Every sampling_rate *
* sampling_down_factor, we check, if current idle time is more than 80%
* (default), then we try to decrease frequency
*
* Any frequency increase takes it to the maximum frequency. Frequency reduction
* happens at minimum steps of 5% (default) of maximum frequency
*/
static unsigned int cs_dbs_timer(struct cpufreq_policy *policy)
{
struct policy_dbs_info *policy_dbs = policy->governor_data;
struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs);
struct dbs_data *dbs_data = policy_dbs->dbs_data;
struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
unsigned int load = dbs_update(policy);
/*
* break out if we 'cannot' reduce the speed as the user might
* want freq_step to be zero
*/
if (cs_tuners->freq_step == 0)
goto out;
/* Check for frequency increase */
if (load > dbs_data->up_threshold) {
dbs_info->down_skip = 0;
/* if we are already at full speed then break out early */
if (dbs_info->requested_freq == policy->max)
goto out;
dbs_info->requested_freq += get_freq_target(cs_tuners, policy);
if (dbs_info->requested_freq > policy->max)
dbs_info->requested_freq = policy->max;
__cpufreq_driver_target(policy, dbs_info->requested_freq,
CPUFREQ_RELATION_H);
goto out;
}
/* if sampling_down_factor is active break out early */
if (++dbs_info->down_skip < dbs_data->sampling_down_factor)
goto out;
dbs_info->down_skip = 0;
/* Check for frequency decrease */
if (load < cs_tuners->down_threshold) {
unsigned int freq_target;
/*
* if we cannot reduce the frequency anymore, break out early
*/
if (policy->cur == policy->min)
goto out;
freq_target = get_freq_target(cs_tuners, policy);
if (dbs_info->requested_freq > freq_target)
dbs_info->requested_freq -= freq_target;
else
dbs_info->requested_freq = policy->min;
__cpufreq_driver_target(policy, dbs_info->requested_freq,
CPUFREQ_RELATION_L);
}
out:
return dbs_data->sampling_rate;
}
static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data);
static struct notifier_block cs_cpufreq_notifier_block = {
.notifier_call = dbs_cpufreq_notifier,
};
/************************** sysfs interface ************************/
static struct dbs_governor cs_dbs_gov;
static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set,
const char *buf, size_t count)
{
struct dbs_data *dbs_data = to_dbs_data(attr_set);
unsigned int input;
int ret;
ret = sscanf(buf, "%u", &input);
if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
return -EINVAL;
dbs_data->sampling_down_factor = input;
return count;
}
static ssize_t store_up_threshold(struct gov_attr_set *attr_set,
const char *buf, size_t count)
{
struct dbs_data *dbs_data = to_dbs_data(attr_set);
struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
unsigned int input;
int ret;
ret = sscanf(buf, "%u", &input);
if (ret != 1 || input > 100 || input <= cs_tuners->down_threshold)
return -EINVAL;
dbs_data->up_threshold = input;
return count;
}
static ssize_t store_down_threshold(struct gov_attr_set *attr_set,
const char *buf, size_t count)
{
struct dbs_data *dbs_data = to_dbs_data(attr_set);
struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
unsigned int input;
int ret;
ret = sscanf(buf, "%u", &input);
/* cannot be lower than 11 otherwise freq will not fall */
if (ret != 1 || input < 11 || input > 100 ||
input >= dbs_data->up_threshold)
return -EINVAL;
cs_tuners->down_threshold = input;
return count;
}
static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set,
const char *buf, size_t count)
{
struct dbs_data *dbs_data = to_dbs_data(attr_set);
unsigned int input;
int ret;
ret = sscanf(buf, "%u", &input);
if (ret != 1)
return -EINVAL;
if (input > 1)
input = 1;
if (input == dbs_data->ignore_nice_load) /* nothing to do */
return count;
dbs_data->ignore_nice_load = input;
/* we need to re-evaluate prev_cpu_idle */
gov_update_cpu_data(dbs_data);
return count;
}
static ssize_t store_freq_step(struct gov_attr_set *attr_set, const char *buf,
size_t count)
{
struct dbs_data *dbs_data = to_dbs_data(attr_set);
struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
unsigned int input;
int ret;
ret = sscanf(buf, "%u", &input);
if (ret != 1)
return -EINVAL;
if (input > 100)
input = 100;
/*
* no need to test here if freq_step is zero as the user might actually
* want this, they would be crazy though :)
*/
cs_tuners->freq_step = input;
return count;
}
cpufreq: governor: New sysfs show/store callbacks for governor tunables The ondemand and conservative governors use the global-attr or freq-attr structures to represent sysfs attributes corresponding to their tunables (which of them is actually used depends on whether or not different policy objects can use the same governor with different tunables at the same time and, consequently, on where those attributes are located in sysfs). Unfortunately, in the freq-attr case, the standard cpufreq show/store sysfs attribute callbacks are applied to the governor tunable attributes and they always acquire the policy->rwsem lock before carrying out the operation. That may lead to an ABBA deadlock if governor tunable attributes are removed under policy->rwsem while one of them is being accessed concurrently (if sysfs attributes removal wins the race, it will wait for the access to complete with policy->rwsem held while the attribute callback will block on policy->rwsem indefinitely). We attempted to address this issue by dropping policy->rwsem around governor tunable attributes removal (that is, around invocations of the ->governor callback with the event arg equal to CPUFREQ_GOV_POLICY_EXIT) in cpufreq_set_policy(), but that opened up race conditions that had not been possible with policy->rwsem held all the time. Therefore policy->rwsem cannot be dropped in cpufreq_set_policy() at any point, but the deadlock situation described above must be avoided too. To that end, use the observation that in principle governor tunables may be represented by the same data type regardless of whether the governor is system-wide or per-policy and introduce a new structure, struct governor_attr, for representing them and new corresponding macros for creating show/store sysfs callbacks for them. Also make their parent kobject use a new kobject type whose default show/store callbacks are not related to the standard core cpufreq ones in any way (and they don't acquire policy->rwsem in particular). Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> Tested-by: Juri Lelli <juri.lelli@arm.com> Tested-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com> [ rjw: Subject & changelog + rebase ] Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2016-02-09 10:31:33 +07:00
gov_show_one_common(sampling_rate);
gov_show_one_common(sampling_down_factor);
gov_show_one_common(up_threshold);
gov_show_one_common(ignore_nice_load);
gov_show_one_common(min_sampling_rate);
gov_show_one(cs, down_threshold);
gov_show_one(cs, freq_step);
gov_attr_rw(sampling_rate);
gov_attr_rw(sampling_down_factor);
gov_attr_rw(up_threshold);
gov_attr_rw(ignore_nice_load);
gov_attr_ro(min_sampling_rate);
gov_attr_rw(down_threshold);
gov_attr_rw(freq_step);
static struct attribute *cs_attributes[] = {
&min_sampling_rate.attr,
&sampling_rate.attr,
&sampling_down_factor.attr,
&up_threshold.attr,
&down_threshold.attr,
&ignore_nice_load.attr,
&freq_step.attr,
NULL
};
/************************** sysfs end ************************/
static struct policy_dbs_info *cs_alloc(void)
{
struct cs_policy_dbs_info *dbs_info;
dbs_info = kzalloc(sizeof(*dbs_info), GFP_KERNEL);
return dbs_info ? &dbs_info->policy_dbs : NULL;
}
static void cs_free(struct policy_dbs_info *policy_dbs)
{
kfree(to_dbs_info(policy_dbs));
}
static int cs_init(struct dbs_data *dbs_data, bool notify)
{
struct cs_dbs_tuners *tuners;
tuners = kzalloc(sizeof(*tuners), GFP_KERNEL);
if (!tuners)
return -ENOMEM;
tuners->down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD;
tuners->freq_step = DEF_FREQUENCY_STEP;
dbs_data->up_threshold = DEF_FREQUENCY_UP_THRESHOLD;
dbs_data->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR;
dbs_data->ignore_nice_load = 0;
dbs_data->tuners = tuners;
dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO *
jiffies_to_usecs(10);
if (notify)
cpufreq_register_notifier(&cs_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
return 0;
}
static void cs_exit(struct dbs_data *dbs_data, bool notify)
{
if (notify)
cpufreq_unregister_notifier(&cs_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
kfree(dbs_data->tuners);
}
static void cs_start(struct cpufreq_policy *policy)
{
struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data);
dbs_info->down_skip = 0;
dbs_info->requested_freq = policy->cur;
}
static struct dbs_governor cs_dbs_gov = {
.gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"),
cpufreq: governor: New sysfs show/store callbacks for governor tunables The ondemand and conservative governors use the global-attr or freq-attr structures to represent sysfs attributes corresponding to their tunables (which of them is actually used depends on whether or not different policy objects can use the same governor with different tunables at the same time and, consequently, on where those attributes are located in sysfs). Unfortunately, in the freq-attr case, the standard cpufreq show/store sysfs attribute callbacks are applied to the governor tunable attributes and they always acquire the policy->rwsem lock before carrying out the operation. That may lead to an ABBA deadlock if governor tunable attributes are removed under policy->rwsem while one of them is being accessed concurrently (if sysfs attributes removal wins the race, it will wait for the access to complete with policy->rwsem held while the attribute callback will block on policy->rwsem indefinitely). We attempted to address this issue by dropping policy->rwsem around governor tunable attributes removal (that is, around invocations of the ->governor callback with the event arg equal to CPUFREQ_GOV_POLICY_EXIT) in cpufreq_set_policy(), but that opened up race conditions that had not been possible with policy->rwsem held all the time. Therefore policy->rwsem cannot be dropped in cpufreq_set_policy() at any point, but the deadlock situation described above must be avoided too. To that end, use the observation that in principle governor tunables may be represented by the same data type regardless of whether the governor is system-wide or per-policy and introduce a new structure, struct governor_attr, for representing them and new corresponding macros for creating show/store sysfs callbacks for them. Also make their parent kobject use a new kobject type whose default show/store callbacks are not related to the standard core cpufreq ones in any way (and they don't acquire policy->rwsem in particular). Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> Tested-by: Juri Lelli <juri.lelli@arm.com> Tested-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com> [ rjw: Subject & changelog + rebase ] Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2016-02-09 10:31:33 +07:00
.kobj_type = { .default_attrs = cs_attributes },
.gov_dbs_timer = cs_dbs_timer,
.alloc = cs_alloc,
.free = cs_free,
.init = cs_init,
.exit = cs_exit,
.start = cs_start,
};
#define CPU_FREQ_GOV_CONSERVATIVE (&cs_dbs_gov.gov)
static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
struct cpufreq_freqs *freq = data;
struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu);
struct cs_policy_dbs_info *dbs_info;
if (!policy)
return 0;
/* policy isn't governed by conservative governor */
if (policy->governor != CPU_FREQ_GOV_CONSERVATIVE)
return 0;
dbs_info = to_dbs_info(policy->governor_data);
/*
* we only care if our internally tracked freq moves outside the 'valid'
* ranges of frequency available to us otherwise we do not change it
*/
if (dbs_info->requested_freq > policy->max
|| dbs_info->requested_freq < policy->min)
dbs_info->requested_freq = freq->new;
return 0;
}
static int __init cpufreq_gov_dbs_init(void)
{
return cpufreq_register_governor(CPU_FREQ_GOV_CONSERVATIVE);
}
static void __exit cpufreq_gov_dbs_exit(void)
{
cpufreq_unregister_governor(CPU_FREQ_GOV_CONSERVATIVE);
}
MODULE_AUTHOR("Alexander Clouter <alex@digriz.org.uk>");
MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for "
"Low Latency Frequency Transition capable processors "
"optimised for use in a battery environment");
MODULE_LICENSE("GPL");
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE
struct cpufreq_governor *cpufreq_default_governor(void)
{
return CPU_FREQ_GOV_CONSERVATIVE;
}
fs_initcall(cpufreq_gov_dbs_init);
#else
module_init(cpufreq_gov_dbs_init);
#endif
module_exit(cpufreq_gov_dbs_exit);