mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-27 00:39:20 +07:00
A set of scheduler fixes:
- Address a load balancer regression by making the load balancer use the same logic as the wakeup path to spread tasks in the LLC domain. - Prefer the CPU on which a task run last over the local CPU in the fast wakeup path for asymmetric CPU capacity systems to align with the symmetric case. This ensures more locality and prevents massive migration overhead on those asymetric systems - Fix a memory corruption bug in the scheduler debug code caused by handing a modified buffer pointer to kfree(). -----BEGIN PGP SIGNATURE----- iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAl+xJIoTHHRnbHhAbGlu dXRyb25peC5kZQAKCRCmGPVMDXSYofyGD/9rUnLlC1h7jEufVa4yPG94DcEqiXT7 8B/zNRKnOmqQePCYUm+DS8njSFqpF9VjR+5zpos3bgYqwn7DyfV+hpxbbgS9NDh/ qRg5gxhTrR4uMyZN62Fex5JS4bP8mKO7oc0usgV2Ytsg3e4H+9DqYhuaA5GrJAxC J3d1Hv/YBW2Uo+RZpB20aaJr0srN7bswTtPMxeeqo8q3Qh4pFcI+rmA4WphVAgHF jQWaNP4YVTgNjqxy7nBp7zFHlSdRbLohldZFtueYmRo1mjmkyQ34Cg7etfBvN1Uf iVYZLaInr0YPr0qR4FrQ3yI8ln/HESxshs0ARzMReYVT71mV//o5wftE18uCULQB rRu9vYz+LBVhkdgx118jJdNJqyqk6Ca6h9ZLqyBKuckj9a39289bwWiS6D/6W51p gurq58YTb2lRzyCnOVEULXehYRJkDI8EToiWppRVm9gy43OFPNox7n6TvNLW6BLS I8msTVdqDYXXj4U1o4Mf9K5LBKlda+ARuBu87r7kH1BJLxXHnOHcEkmeN8O9k7eu jdWfeDzDDjBjt/TU+X4f4RNjudUZrSPQrrESE5+XhfM4CwqcPXa2M/dGtPekW/ED 9IqxPvwkau+0Ym6gkuanfnmda+JVR/nLvZV0uFuUGd+2xMcRemZbZE6hTUiYvYPY CAHpOhmeakbr6w== =wFcU -----END PGP SIGNATURE----- Merge tag 'sched-urgent-2020-11-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull scheduler fixes from Thomas Gleixner: "A set of scheduler fixes: - Address a load balancer regression by making the load balancer use the same logic as the wakeup path to spread tasks in the LLC domain - Prefer the CPU on which a task run last over the local CPU in the fast wakeup path for asymmetric CPU capacity systems to align with the symmetric case. This ensures more locality and prevents massive migration overhead on those asymetric systems - Fix a memory corruption bug in the scheduler debug code caused by handing a modified buffer pointer to kfree()" * tag 'sched-urgent-2020-11-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/debug: Fix memory corruption caused by multiple small reads of flags sched/fair: Prefer prev cpu in asymmetric wakeup path sched/fair: Ensure tasks spreading in LLC during LB
This commit is contained in:
commit
d0a37fd57f
@ -251,7 +251,7 @@ static int sd_ctl_doflags(struct ctl_table *table, int write,
|
||||
unsigned long flags = *(unsigned long *)table->data;
|
||||
size_t data_size = 0;
|
||||
size_t len = 0;
|
||||
char *tmp;
|
||||
char *tmp, *buf;
|
||||
int idx;
|
||||
|
||||
if (write)
|
||||
@ -269,17 +269,17 @@ static int sd_ctl_doflags(struct ctl_table *table, int write,
|
||||
return 0;
|
||||
}
|
||||
|
||||
tmp = kcalloc(data_size + 1, sizeof(*tmp), GFP_KERNEL);
|
||||
if (!tmp)
|
||||
buf = kcalloc(data_size + 1, sizeof(*buf), GFP_KERNEL);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
|
||||
for_each_set_bit(idx, &flags, __SD_FLAG_CNT) {
|
||||
char *name = sd_flag_debug[idx].name;
|
||||
|
||||
len += snprintf(tmp + len, strlen(name) + 2, "%s ", name);
|
||||
len += snprintf(buf + len, strlen(name) + 2, "%s ", name);
|
||||
}
|
||||
|
||||
tmp += *ppos;
|
||||
tmp = buf + *ppos;
|
||||
len -= *ppos;
|
||||
|
||||
if (len > *lenp)
|
||||
@ -294,7 +294,7 @@ static int sd_ctl_doflags(struct ctl_table *table, int write,
|
||||
*lenp = len;
|
||||
*ppos += len;
|
||||
|
||||
kfree(tmp);
|
||||
kfree(buf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -6172,21 +6172,21 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
|
||||
static int
|
||||
select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
|
||||
{
|
||||
unsigned long best_cap = 0;
|
||||
unsigned long task_util, best_cap = 0;
|
||||
int cpu, best_cpu = -1;
|
||||
struct cpumask *cpus;
|
||||
|
||||
sync_entity_load_avg(&p->se);
|
||||
|
||||
cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
|
||||
cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
|
||||
|
||||
task_util = uclamp_task_util(p);
|
||||
|
||||
for_each_cpu_wrap(cpu, cpus, target) {
|
||||
unsigned long cpu_cap = capacity_of(cpu);
|
||||
|
||||
if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu))
|
||||
continue;
|
||||
if (task_fits_capacity(p, cpu_cap))
|
||||
if (fits_capacity(task_util, cpu_cap))
|
||||
return cpu;
|
||||
|
||||
if (cpu_cap > best_cap) {
|
||||
@ -6198,44 +6198,42 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
|
||||
return best_cpu;
|
||||
}
|
||||
|
||||
static inline bool asym_fits_capacity(int task_util, int cpu)
|
||||
{
|
||||
if (static_branch_unlikely(&sched_asym_cpucapacity))
|
||||
return fits_capacity(task_util, capacity_of(cpu));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try and locate an idle core/thread in the LLC cache domain.
|
||||
*/
|
||||
static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
||||
{
|
||||
struct sched_domain *sd;
|
||||
unsigned long task_util;
|
||||
int i, recent_used_cpu;
|
||||
|
||||
/*
|
||||
* For asymmetric CPU capacity systems, our domain of interest is
|
||||
* sd_asym_cpucapacity rather than sd_llc.
|
||||
* On asymmetric system, update task utilization because we will check
|
||||
* that the task fits with cpu's capacity.
|
||||
*/
|
||||
if (static_branch_unlikely(&sched_asym_cpucapacity)) {
|
||||
sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target));
|
||||
/*
|
||||
* On an asymmetric CPU capacity system where an exclusive
|
||||
* cpuset defines a symmetric island (i.e. one unique
|
||||
* capacity_orig value through the cpuset), the key will be set
|
||||
* but the CPUs within that cpuset will not have a domain with
|
||||
* SD_ASYM_CPUCAPACITY. These should follow the usual symmetric
|
||||
* capacity path.
|
||||
*/
|
||||
if (!sd)
|
||||
goto symmetric;
|
||||
|
||||
i = select_idle_capacity(p, sd, target);
|
||||
return ((unsigned)i < nr_cpumask_bits) ? i : target;
|
||||
sync_entity_load_avg(&p->se);
|
||||
task_util = uclamp_task_util(p);
|
||||
}
|
||||
|
||||
symmetric:
|
||||
if (available_idle_cpu(target) || sched_idle_cpu(target))
|
||||
if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
|
||||
asym_fits_capacity(task_util, target))
|
||||
return target;
|
||||
|
||||
/*
|
||||
* If the previous CPU is cache affine and idle, don't be stupid:
|
||||
*/
|
||||
if (prev != target && cpus_share_cache(prev, target) &&
|
||||
(available_idle_cpu(prev) || sched_idle_cpu(prev)))
|
||||
(available_idle_cpu(prev) || sched_idle_cpu(prev)) &&
|
||||
asym_fits_capacity(task_util, prev))
|
||||
return prev;
|
||||
|
||||
/*
|
||||
@ -6258,7 +6256,8 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
||||
recent_used_cpu != target &&
|
||||
cpus_share_cache(recent_used_cpu, target) &&
|
||||
(available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
|
||||
cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr)) {
|
||||
cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) &&
|
||||
asym_fits_capacity(task_util, recent_used_cpu)) {
|
||||
/*
|
||||
* Replace recent_used_cpu with prev as it is a potential
|
||||
* candidate for the next wake:
|
||||
@ -6267,6 +6266,26 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
||||
return recent_used_cpu;
|
||||
}
|
||||
|
||||
/*
|
||||
* For asymmetric CPU capacity systems, our domain of interest is
|
||||
* sd_asym_cpucapacity rather than sd_llc.
|
||||
*/
|
||||
if (static_branch_unlikely(&sched_asym_cpucapacity)) {
|
||||
sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target));
|
||||
/*
|
||||
* On an asymmetric CPU capacity system where an exclusive
|
||||
* cpuset defines a symmetric island (i.e. one unique
|
||||
* capacity_orig value through the cpuset), the key will be set
|
||||
* but the CPUs within that cpuset will not have a domain with
|
||||
* SD_ASYM_CPUCAPACITY. These should follow the usual symmetric
|
||||
* capacity path.
|
||||
*/
|
||||
if (sd) {
|
||||
i = select_idle_capacity(p, sd, target);
|
||||
return ((unsigned)i < nr_cpumask_bits) ? i : target;
|
||||
}
|
||||
}
|
||||
|
||||
sd = rcu_dereference(per_cpu(sd_llc, target));
|
||||
if (!sd)
|
||||
return target;
|
||||
@ -9031,7 +9050,8 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
||||
* emptying busiest.
|
||||
*/
|
||||
if (local->group_type == group_has_spare) {
|
||||
if (busiest->group_type > group_fully_busy) {
|
||||
if ((busiest->group_type > group_fully_busy) &&
|
||||
!(env->sd->flags & SD_SHARE_PKG_RESOURCES)) {
|
||||
/*
|
||||
* If busiest is overloaded, try to fill spare
|
||||
* capacity. This might end up creating spare capacity
|
||||
|
Loading…
Reference in New Issue
Block a user