mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-15 13:36:43 +07:00
sched: Replace rq::wake_list
The recent commit: 90b5363acd
("sched: Clean up scheduler_ipi()")
got smp_call_function_single_async() subtly wrong. Even though it will
return -EBUSY when trying to re-use a csd, that condition is not
atomic and still requires external serialization.
The change in ttwu_queue_remote() got this wrong.
While on first reading ttwu_queue_remote() has an atomic test-and-set
that appears to serialize the use, the matching 'release' is not in
the right place to actually guarantee this serialization.
The actual race is vs the sched_ttwu_pending() call in the idle loop;
that can run the wakeup-list without consuming the CSD.
Instead of trying to chain the lists, merge them.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20200526161908.129371594@infradead.org
This commit is contained in:
parent
126c2092e5
commit
a148866489
@ -654,6 +654,7 @@ struct task_struct {
|
|||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
struct llist_node wake_entry;
|
struct llist_node wake_entry;
|
||||||
|
unsigned int wake_entry_type;
|
||||||
int on_cpu;
|
int on_cpu;
|
||||||
#ifdef CONFIG_THREAD_INFO_IN_TASK
|
#ifdef CONFIG_THREAD_INFO_IN_TASK
|
||||||
/* Current CPU: */
|
/* Current CPU: */
|
||||||
|
@ -25,6 +25,7 @@ enum {
|
|||||||
CSD_TYPE_ASYNC = 0x00,
|
CSD_TYPE_ASYNC = 0x00,
|
||||||
CSD_TYPE_SYNC = 0x10,
|
CSD_TYPE_SYNC = 0x10,
|
||||||
CSD_TYPE_IRQ_WORK = 0x20,
|
CSD_TYPE_IRQ_WORK = 0x20,
|
||||||
|
CSD_TYPE_TTWU = 0x30,
|
||||||
CSD_FLAG_TYPE_MASK = 0xF0,
|
CSD_FLAG_TYPE_MASK = 0xF0,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1538,7 +1538,7 @@ static int migration_cpu_stop(void *data)
|
|||||||
* __migrate_task() such that we will not miss enforcing cpus_ptr
|
* __migrate_task() such that we will not miss enforcing cpus_ptr
|
||||||
* during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
|
* during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
|
||||||
*/
|
*/
|
||||||
sched_ttwu_pending();
|
flush_smp_call_function_from_idle();
|
||||||
|
|
||||||
raw_spin_lock(&p->pi_lock);
|
raw_spin_lock(&p->pi_lock);
|
||||||
rq_lock(rq, &rf);
|
rq_lock(rq, &rf);
|
||||||
@ -2272,14 +2272,13 @@ static int ttwu_remote(struct task_struct *p, int wake_flags)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
void sched_ttwu_pending(void)
|
void sched_ttwu_pending(void *arg)
|
||||||
{
|
{
|
||||||
|
struct llist_node *llist = arg;
|
||||||
struct rq *rq = this_rq();
|
struct rq *rq = this_rq();
|
||||||
struct llist_node *llist;
|
|
||||||
struct task_struct *p, *t;
|
struct task_struct *p, *t;
|
||||||
struct rq_flags rf;
|
struct rq_flags rf;
|
||||||
|
|
||||||
llist = llist_del_all(&rq->wake_list);
|
|
||||||
if (!llist)
|
if (!llist)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -2299,11 +2298,6 @@ void sched_ttwu_pending(void)
|
|||||||
rq_unlock_irqrestore(rq, &rf);
|
rq_unlock_irqrestore(rq, &rf);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void wake_csd_func(void *info)
|
|
||||||
{
|
|
||||||
sched_ttwu_pending();
|
|
||||||
}
|
|
||||||
|
|
||||||
void send_call_function_single_ipi(int cpu)
|
void send_call_function_single_ipi(int cpu)
|
||||||
{
|
{
|
||||||
struct rq *rq = cpu_rq(cpu);
|
struct rq *rq = cpu_rq(cpu);
|
||||||
@ -2327,12 +2321,7 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags
|
|||||||
p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);
|
p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);
|
||||||
|
|
||||||
WRITE_ONCE(rq->ttwu_pending, 1);
|
WRITE_ONCE(rq->ttwu_pending, 1);
|
||||||
if (llist_add(&p->wake_entry, &rq->wake_list)) {
|
__smp_call_single_queue(cpu, &p->wake_entry);
|
||||||
if (!set_nr_if_polling(rq->idle))
|
|
||||||
smp_call_function_single_async(cpu, &rq->wake_csd);
|
|
||||||
else
|
|
||||||
trace_sched_wake_idle_without_ipi(cpu);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void wake_up_if_idle(int cpu)
|
void wake_up_if_idle(int cpu)
|
||||||
@ -2772,6 +2761,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
|
|||||||
p->capture_control = NULL;
|
p->capture_control = NULL;
|
||||||
#endif
|
#endif
|
||||||
init_numa_balancing(clone_flags, p);
|
init_numa_balancing(clone_flags, p);
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
p->wake_entry_type = CSD_TYPE_TTWU;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
|
DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
|
||||||
@ -6564,7 +6556,6 @@ int sched_cpu_dying(unsigned int cpu)
|
|||||||
struct rq_flags rf;
|
struct rq_flags rf;
|
||||||
|
|
||||||
/* Handle pending wakeups and then migrate everything off */
|
/* Handle pending wakeups and then migrate everything off */
|
||||||
sched_ttwu_pending();
|
|
||||||
sched_tick_stop(cpu);
|
sched_tick_stop(cpu);
|
||||||
|
|
||||||
rq_lock_irqsave(rq, &rf);
|
rq_lock_irqsave(rq, &rf);
|
||||||
@ -6763,8 +6754,6 @@ void __init sched_init(void)
|
|||||||
rq->avg_idle = 2*sysctl_sched_migration_cost;
|
rq->avg_idle = 2*sysctl_sched_migration_cost;
|
||||||
rq->max_idle_balance_cost = sysctl_sched_migration_cost;
|
rq->max_idle_balance_cost = sysctl_sched_migration_cost;
|
||||||
|
|
||||||
rq_csd_init(rq, &rq->wake_csd, wake_csd_func);
|
|
||||||
|
|
||||||
INIT_LIST_HEAD(&rq->cfs_tasks);
|
INIT_LIST_HEAD(&rq->cfs_tasks);
|
||||||
|
|
||||||
rq_attach_root(rq, &def_root_domain);
|
rq_attach_root(rq, &def_root_domain);
|
||||||
|
@ -294,7 +294,6 @@ static void do_idle(void)
|
|||||||
* critical section.
|
* critical section.
|
||||||
*/
|
*/
|
||||||
flush_smp_call_function_from_idle();
|
flush_smp_call_function_from_idle();
|
||||||
sched_ttwu_pending();
|
|
||||||
schedule_idle();
|
schedule_idle();
|
||||||
|
|
||||||
if (unlikely(klp_patch_pending(current)))
|
if (unlikely(klp_patch_pending(current)))
|
||||||
|
@ -1023,11 +1023,6 @@ struct rq {
|
|||||||
unsigned int ttwu_local;
|
unsigned int ttwu_local;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
call_single_data_t wake_csd;
|
|
||||||
struct llist_head wake_list;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef CONFIG_CPU_IDLE
|
#ifdef CONFIG_CPU_IDLE
|
||||||
/* Must be inspected within a rcu lock section */
|
/* Must be inspected within a rcu lock section */
|
||||||
struct cpuidle_state *idle_state;
|
struct cpuidle_state *idle_state;
|
||||||
@ -1371,8 +1366,6 @@ queue_balance_callback(struct rq *rq,
|
|||||||
rq->balance_callback = head;
|
rq->balance_callback = head;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern void sched_ttwu_pending(void);
|
|
||||||
|
|
||||||
#define rcu_dereference_check_sched_domain(p) \
|
#define rcu_dereference_check_sched_domain(p) \
|
||||||
rcu_dereference_check((p), \
|
rcu_dereference_check((p), \
|
||||||
lockdep_is_held(&sched_domains_mutex))
|
lockdep_is_held(&sched_domains_mutex))
|
||||||
@ -1512,7 +1505,6 @@ extern void flush_smp_call_function_from_idle(void);
|
|||||||
|
|
||||||
#else /* !CONFIG_SMP: */
|
#else /* !CONFIG_SMP: */
|
||||||
static inline void flush_smp_call_function_from_idle(void) { }
|
static inline void flush_smp_call_function_from_idle(void) { }
|
||||||
static inline void sched_ttwu_pending(void) { }
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "stats.h"
|
#include "stats.h"
|
||||||
|
47
kernel/smp.c
47
kernel/smp.c
@ -196,6 +196,7 @@ void generic_smp_call_function_single_interrupt(void)
|
|||||||
flush_smp_call_function_queue(true);
|
flush_smp_call_function_queue(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern void sched_ttwu_pending(void *);
|
||||||
extern void irq_work_single(void *);
|
extern void irq_work_single(void *);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -244,6 +245,10 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
|
|||||||
csd->func);
|
csd->func);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case CSD_TYPE_TTWU:
|
||||||
|
pr_warn("IPI task-wakeup sent to offline CPU\n");
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
pr_warn("IPI callback, unknown type %d, sent to offline CPU\n",
|
pr_warn("IPI callback, unknown type %d, sent to offline CPU\n",
|
||||||
CSD_TYPE(csd));
|
CSD_TYPE(csd));
|
||||||
@ -275,22 +280,43 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!entry)
|
||||||
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Second; run all !SYNC callbacks.
|
* Second; run all !SYNC callbacks.
|
||||||
*/
|
*/
|
||||||
|
prev = NULL;
|
||||||
llist_for_each_entry_safe(csd, csd_next, entry, llist) {
|
llist_for_each_entry_safe(csd, csd_next, entry, llist) {
|
||||||
int type = CSD_TYPE(csd);
|
int type = CSD_TYPE(csd);
|
||||||
|
|
||||||
if (type == CSD_TYPE_ASYNC) {
|
if (type != CSD_TYPE_TTWU) {
|
||||||
smp_call_func_t func = csd->func;
|
if (prev) {
|
||||||
void *info = csd->info;
|
prev->next = &csd_next->llist;
|
||||||
|
} else {
|
||||||
|
entry = &csd_next->llist;
|
||||||
|
}
|
||||||
|
|
||||||
csd_unlock(csd);
|
if (type == CSD_TYPE_ASYNC) {
|
||||||
func(info);
|
smp_call_func_t func = csd->func;
|
||||||
} else if (type == CSD_TYPE_IRQ_WORK) {
|
void *info = csd->info;
|
||||||
irq_work_single(csd);
|
|
||||||
|
csd_unlock(csd);
|
||||||
|
func(info);
|
||||||
|
} else if (type == CSD_TYPE_IRQ_WORK) {
|
||||||
|
irq_work_single(csd);
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
prev = &csd->llist;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Third; only CSD_TYPE_TTWU is left, issue those.
|
||||||
|
*/
|
||||||
|
if (entry)
|
||||||
|
sched_ttwu_pending(entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
void flush_smp_call_function_from_idle(void)
|
void flush_smp_call_function_from_idle(void)
|
||||||
@ -659,6 +685,13 @@ void __init smp_init(void)
|
|||||||
BUILD_BUG_ON(offsetof(struct irq_work, flags) !=
|
BUILD_BUG_ON(offsetof(struct irq_work, flags) !=
|
||||||
offsetof(struct __call_single_data, flags));
|
offsetof(struct __call_single_data, flags));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Assert the CSD_TYPE_TTWU layout is similar enough
|
||||||
|
* for task_struct to be on the @call_single_queue.
|
||||||
|
*/
|
||||||
|
BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) !=
|
||||||
|
offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist));
|
||||||
|
|
||||||
idle_threads_init();
|
idle_threads_init();
|
||||||
cpuhp_threads_init();
|
cpuhp_threads_init();
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user