mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-18 12:16:19 +07:00
Merge branch 'cpu_stop' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc into sched/core
This commit is contained in:
commit
e7858f52a5
@ -182,16 +182,6 @@ Similarly, sched_expedited RCU provides the following:
|
||||
sched_expedited-torture: Reader Pipe: 12660320201 95875 0 0 0 0 0 0 0 0 0
|
||||
sched_expedited-torture: Reader Batch: 12660424885 0 0 0 0 0 0 0 0 0 0
|
||||
sched_expedited-torture: Free-Block Circulation: 1090795 1090795 1090794 1090793 1090792 1090791 1090790 1090789 1090788 1090787 0
|
||||
state: -1 / 0:0 3:0 4:0
|
||||
|
||||
As before, the first four lines are similar to those for RCU.
|
||||
The last line shows the task-migration state. The first number is
|
||||
-1 if synchronize_sched_expedited() is idle, -2 if in the process of
|
||||
posting wakeups to the migration kthreads, and N when waiting on CPU N.
|
||||
Each of the colon-separated fields following the "/" is a CPU:state pair.
|
||||
Valid states are "0" for idle, "1" for waiting for quiescent state,
|
||||
"2" for passed through quiescent state, and "3" when a race with a
|
||||
CPU-hotplug event forces use of the synchronize_sched() primitive.
|
||||
|
||||
|
||||
USAGE
|
||||
|
@ -391,7 +391,6 @@ static void __init time_init_wq(void)
|
||||
if (time_sync_wq)
|
||||
return;
|
||||
time_sync_wq = create_singlethread_workqueue("timesync");
|
||||
stop_machine_create();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -80,12 +80,6 @@ static void do_suspend(void)
|
||||
|
||||
shutting_down = SHUTDOWN_SUSPEND;
|
||||
|
||||
err = stop_machine_create();
|
||||
if (err) {
|
||||
printk(KERN_ERR "xen suspend: failed to setup stop_machine %d\n", err);
|
||||
goto out;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PREEMPT
|
||||
/* If the kernel is preemptible, we need to freeze all the processes
|
||||
to prevent them from being in the middle of a pagetable update
|
||||
@ -93,7 +87,7 @@ static void do_suspend(void)
|
||||
err = freeze_processes();
|
||||
if (err) {
|
||||
printk(KERN_ERR "xen suspend: freeze failed %d\n", err);
|
||||
goto out_destroy_sm;
|
||||
goto out;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -136,12 +130,8 @@ static void do_suspend(void)
|
||||
out_thaw:
|
||||
#ifdef CONFIG_PREEMPT
|
||||
thaw_processes();
|
||||
|
||||
out_destroy_sm:
|
||||
#endif
|
||||
stop_machine_destroy();
|
||||
|
||||
out:
|
||||
#endif
|
||||
shutting_down = SHUTDOWN_INVALID;
|
||||
}
|
||||
#endif /* CONFIG_PM_SLEEP */
|
||||
|
@ -60,8 +60,6 @@ static inline long rcu_batches_completed_bh(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int rcu_expedited_torture_stats(char *page);
|
||||
|
||||
static inline void rcu_force_quiescent_state(void)
|
||||
{
|
||||
}
|
||||
|
@ -35,7 +35,6 @@ struct notifier_block;
|
||||
extern void rcu_sched_qs(int cpu);
|
||||
extern void rcu_bh_qs(int cpu);
|
||||
extern int rcu_needs_cpu(int cpu);
|
||||
extern int rcu_expedited_torture_stats(char *page);
|
||||
|
||||
#ifdef CONFIG_TREE_PREEMPT_RCU
|
||||
|
||||
|
@ -1,13 +1,101 @@
|
||||
#ifndef _LINUX_STOP_MACHINE
|
||||
#define _LINUX_STOP_MACHINE
|
||||
/* "Bogolock": stop the entire machine, disable interrupts. This is a
|
||||
very heavy lock, which is equivalent to grabbing every spinlock
|
||||
(and more). So the "read" side to such a lock is anything which
|
||||
disables preeempt. */
|
||||
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/list.h>
|
||||
#include <asm/system.h>
|
||||
|
||||
/*
|
||||
* stop_cpu[s]() is simplistic per-cpu maximum priority cpu
|
||||
* monopolization mechanism. The caller can specify a non-sleeping
|
||||
* function to be executed on a single or multiple cpus preempting all
|
||||
* other processes and monopolizing those cpus until it finishes.
|
||||
*
|
||||
* Resources for this mechanism are preallocated when a cpu is brought
|
||||
* up and requests are guaranteed to be served as long as the target
|
||||
* cpus are online.
|
||||
*/
|
||||
typedef int (*cpu_stop_fn_t)(void *arg);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
struct cpu_stop_work {
|
||||
struct list_head list; /* cpu_stopper->works */
|
||||
cpu_stop_fn_t fn;
|
||||
void *arg;
|
||||
struct cpu_stop_done *done;
|
||||
};
|
||||
|
||||
int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg);
|
||||
void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
|
||||
struct cpu_stop_work *work_buf);
|
||||
int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
|
||||
int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
|
||||
|
||||
#else /* CONFIG_SMP */
|
||||
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
struct cpu_stop_work {
|
||||
struct work_struct work;
|
||||
cpu_stop_fn_t fn;
|
||||
void *arg;
|
||||
};
|
||||
|
||||
static inline int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
|
||||
{
|
||||
int ret = -ENOENT;
|
||||
preempt_disable();
|
||||
if (cpu == smp_processor_id())
|
||||
ret = fn(arg);
|
||||
preempt_enable();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void stop_one_cpu_nowait_workfn(struct work_struct *work)
|
||||
{
|
||||
struct cpu_stop_work *stwork =
|
||||
container_of(work, struct cpu_stop_work, work);
|
||||
preempt_disable();
|
||||
stwork->fn(stwork->arg);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static inline void stop_one_cpu_nowait(unsigned int cpu,
|
||||
cpu_stop_fn_t fn, void *arg,
|
||||
struct cpu_stop_work *work_buf)
|
||||
{
|
||||
if (cpu == smp_processor_id()) {
|
||||
INIT_WORK(&work_buf->work, stop_one_cpu_nowait_workfn);
|
||||
work_buf->fn = fn;
|
||||
work_buf->arg = arg;
|
||||
schedule_work(&work_buf->work);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int stop_cpus(const struct cpumask *cpumask,
|
||||
cpu_stop_fn_t fn, void *arg)
|
||||
{
|
||||
if (cpumask_test_cpu(raw_smp_processor_id(), cpumask))
|
||||
return stop_one_cpu(raw_smp_processor_id(), fn, arg);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
static inline int try_stop_cpus(const struct cpumask *cpumask,
|
||||
cpu_stop_fn_t fn, void *arg)
|
||||
{
|
||||
return stop_cpus(cpumask, fn, arg);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
/*
|
||||
* stop_machine "Bogolock": stop the entire machine, disable
|
||||
* interrupts. This is a very heavy lock, which is equivalent to
|
||||
* grabbing every spinlock (and more). So the "read" side to such a
|
||||
* lock is anything which disables preeempt.
|
||||
*/
|
||||
#if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP)
|
||||
|
||||
/**
|
||||
@ -36,24 +124,7 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
|
||||
*/
|
||||
int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
|
||||
|
||||
/**
|
||||
* stop_machine_create: create all stop_machine threads
|
||||
*
|
||||
* Description: This causes all stop_machine threads to be created before
|
||||
* stop_machine actually gets called. This can be used by subsystems that
|
||||
* need a non failing stop_machine infrastructure.
|
||||
*/
|
||||
int stop_machine_create(void);
|
||||
|
||||
/**
|
||||
* stop_machine_destroy: destroy all stop_machine threads
|
||||
*
|
||||
* Description: This causes all stop_machine threads which were created with
|
||||
* stop_machine_create to be destroyed again.
|
||||
*/
|
||||
void stop_machine_destroy(void);
|
||||
|
||||
#else
|
||||
#else /* CONFIG_STOP_MACHINE && CONFIG_SMP */
|
||||
|
||||
static inline int stop_machine(int (*fn)(void *), void *data,
|
||||
const struct cpumask *cpus)
|
||||
@ -65,8 +136,5 @@ static inline int stop_machine(int (*fn)(void *), void *data,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int stop_machine_create(void) { return 0; }
|
||||
static inline void stop_machine_destroy(void) { }
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
#endif /* _LINUX_STOP_MACHINE */
|
||||
#endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */
|
||||
#endif /* _LINUX_STOP_MACHINE */
|
||||
|
@ -68,7 +68,7 @@ obj-$(CONFIG_USER_NS) += user_namespace.o
|
||||
obj-$(CONFIG_PID_NS) += pid_namespace.o
|
||||
obj-$(CONFIG_IKCONFIG) += configs.o
|
||||
obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o
|
||||
obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
|
||||
obj-$(CONFIG_SMP) += stop_machine.o
|
||||
obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
|
||||
obj-$(CONFIG_AUDIT) += audit.o auditfilter.o audit_watch.o
|
||||
obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
|
||||
|
@ -266,9 +266,6 @@ int __ref cpu_down(unsigned int cpu)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = stop_machine_create();
|
||||
if (err)
|
||||
return err;
|
||||
cpu_maps_update_begin();
|
||||
|
||||
if (cpu_hotplug_disabled) {
|
||||
@ -280,7 +277,6 @@ int __ref cpu_down(unsigned int cpu)
|
||||
|
||||
out:
|
||||
cpu_maps_update_done();
|
||||
stop_machine_destroy();
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL(cpu_down);
|
||||
@ -361,9 +357,6 @@ int disable_nonboot_cpus(void)
|
||||
{
|
||||
int cpu, first_cpu, error;
|
||||
|
||||
error = stop_machine_create();
|
||||
if (error)
|
||||
return error;
|
||||
cpu_maps_update_begin();
|
||||
first_cpu = cpumask_first(cpu_online_mask);
|
||||
/*
|
||||
@ -394,7 +387,6 @@ int disable_nonboot_cpus(void)
|
||||
printk(KERN_ERR "Non-boot CPUs are not disabled\n");
|
||||
}
|
||||
cpu_maps_update_done();
|
||||
stop_machine_destroy();
|
||||
return error;
|
||||
}
|
||||
|
||||
|
@ -723,16 +723,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
|
||||
return -EFAULT;
|
||||
name[MODULE_NAME_LEN-1] = '\0';
|
||||
|
||||
/* Create stop_machine threads since free_module relies on
|
||||
* a non-failing stop_machine call. */
|
||||
ret = stop_machine_create();
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (mutex_lock_interruptible(&module_mutex) != 0) {
|
||||
ret = -EINTR;
|
||||
goto out_stop;
|
||||
}
|
||||
if (mutex_lock_interruptible(&module_mutex) != 0)
|
||||
return -EINTR;
|
||||
|
||||
mod = find_module(name);
|
||||
if (!mod) {
|
||||
@ -792,8 +784,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
|
||||
|
||||
out:
|
||||
mutex_unlock(&module_mutex);
|
||||
out_stop:
|
||||
stop_machine_destroy();
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -669,7 +669,7 @@ static struct rcu_torture_ops sched_expedited_ops = {
|
||||
.sync = synchronize_sched_expedited,
|
||||
.cb_barrier = NULL,
|
||||
.fqs = rcu_sched_force_quiescent_state,
|
||||
.stats = rcu_expedited_torture_stats,
|
||||
.stats = NULL,
|
||||
.irq_capable = 1,
|
||||
.name = "sched_expedited"
|
||||
};
|
||||
|
285
kernel/sched.c
285
kernel/sched.c
@ -55,9 +55,9 @@
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/cpuset.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/stop_machine.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/times.h>
|
||||
@ -539,15 +539,13 @@ struct rq {
|
||||
int post_schedule;
|
||||
int active_balance;
|
||||
int push_cpu;
|
||||
struct cpu_stop_work active_balance_work;
|
||||
/* cpu of this runqueue: */
|
||||
int cpu;
|
||||
int online;
|
||||
|
||||
unsigned long avg_load_per_task;
|
||||
|
||||
struct task_struct *migration_thread;
|
||||
struct list_head migration_queue;
|
||||
|
||||
u64 rt_avg;
|
||||
u64 age_stamp;
|
||||
u64 idle_stamp;
|
||||
@ -2037,21 +2035,18 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
|
||||
__set_task_cpu(p, new_cpu);
|
||||
}
|
||||
|
||||
struct migration_req {
|
||||
struct list_head list;
|
||||
|
||||
struct migration_arg {
|
||||
struct task_struct *task;
|
||||
int dest_cpu;
|
||||
|
||||
struct completion done;
|
||||
};
|
||||
|
||||
static int migration_cpu_stop(void *data);
|
||||
|
||||
/*
|
||||
* The task's runqueue lock must be held.
|
||||
* Returns true if you have to wait for migration thread.
|
||||
*/
|
||||
static int
|
||||
migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
|
||||
static bool migrate_task(struct task_struct *p, int dest_cpu)
|
||||
{
|
||||
struct rq *rq = task_rq(p);
|
||||
|
||||
@ -2059,15 +2054,7 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
|
||||
* If the task is not on a runqueue (and not running), then
|
||||
* the next wake-up will properly place the task.
|
||||
*/
|
||||
if (!p->se.on_rq && !task_running(rq, p))
|
||||
return 0;
|
||||
|
||||
init_completion(&req->done);
|
||||
req->task = p;
|
||||
req->dest_cpu = dest_cpu;
|
||||
list_add(&req->list, &rq->migration_queue);
|
||||
|
||||
return 1;
|
||||
return p->se.on_rq || task_running(rq, p);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3110,7 +3097,6 @@ static void update_cpu_load(struct rq *this_rq)
|
||||
void sched_exec(void)
|
||||
{
|
||||
struct task_struct *p = current;
|
||||
struct migration_req req;
|
||||
unsigned long flags;
|
||||
struct rq *rq;
|
||||
int dest_cpu;
|
||||
@ -3124,17 +3110,11 @@ void sched_exec(void)
|
||||
* select_task_rq() can race against ->cpus_allowed
|
||||
*/
|
||||
if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
|
||||
likely(cpu_active(dest_cpu)) &&
|
||||
migrate_task(p, dest_cpu, &req)) {
|
||||
/* Need to wait for migration thread (might exit: take ref). */
|
||||
struct task_struct *mt = rq->migration_thread;
|
||||
likely(cpu_active(dest_cpu)) && migrate_task(p, dest_cpu)) {
|
||||
struct migration_arg arg = { p, dest_cpu };
|
||||
|
||||
get_task_struct(mt);
|
||||
task_rq_unlock(rq, &flags);
|
||||
wake_up_process(mt);
|
||||
put_task_struct(mt);
|
||||
wait_for_completion(&req.done);
|
||||
|
||||
stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
|
||||
return;
|
||||
}
|
||||
unlock:
|
||||
@ -5290,17 +5270,15 @@ static inline void sched_init_granularity(void)
|
||||
/*
|
||||
* This is how migration works:
|
||||
*
|
||||
* 1) we queue a struct migration_req structure in the source CPU's
|
||||
* runqueue and wake up that CPU's migration thread.
|
||||
* 2) we down() the locked semaphore => thread blocks.
|
||||
* 3) migration thread wakes up (implicitly it forces the migrated
|
||||
* thread off the CPU)
|
||||
* 4) it gets the migration request and checks whether the migrated
|
||||
* task is still in the wrong runqueue.
|
||||
* 5) if it's in the wrong runqueue then the migration thread removes
|
||||
* 1) we invoke migration_cpu_stop() on the target CPU using
|
||||
* stop_one_cpu().
|
||||
* 2) stopper starts to run (implicitly forcing the migrated thread
|
||||
* off the CPU)
|
||||
* 3) it checks whether the migrated task is still in the wrong runqueue.
|
||||
* 4) if it's in the wrong runqueue then the migration thread removes
|
||||
* it and puts it into the right queue.
|
||||
* 6) migration thread up()s the semaphore.
|
||||
* 7) we wake up and the migration is done.
|
||||
* 5) stopper completes and stop_one_cpu() returns and the migration
|
||||
* is done.
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -5314,9 +5292,9 @@ static inline void sched_init_granularity(void)
|
||||
*/
|
||||
int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
|
||||
{
|
||||
struct migration_req req;
|
||||
unsigned long flags;
|
||||
struct rq *rq;
|
||||
unsigned int dest_cpu;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
@ -5354,15 +5332,12 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
|
||||
if (cpumask_test_cpu(task_cpu(p), new_mask))
|
||||
goto out;
|
||||
|
||||
if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) {
|
||||
dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
|
||||
if (migrate_task(p, dest_cpu)) {
|
||||
struct migration_arg arg = { p, dest_cpu };
|
||||
/* Need help from migration thread: drop lock and wait. */
|
||||
struct task_struct *mt = rq->migration_thread;
|
||||
|
||||
get_task_struct(mt);
|
||||
task_rq_unlock(rq, &flags);
|
||||
wake_up_process(mt);
|
||||
put_task_struct(mt);
|
||||
wait_for_completion(&req.done);
|
||||
stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
|
||||
tlb_migrate_finish(p->mm);
|
||||
return 0;
|
||||
}
|
||||
@ -5420,70 +5395,22 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define RCU_MIGRATION_IDLE 0
|
||||
#define RCU_MIGRATION_NEED_QS 1
|
||||
#define RCU_MIGRATION_GOT_QS 2
|
||||
#define RCU_MIGRATION_MUST_SYNC 3
|
||||
|
||||
/*
|
||||
* migration_thread - this is a highprio system thread that performs
|
||||
* thread migration by bumping thread off CPU then 'pushing' onto
|
||||
* another runqueue.
|
||||
* migration_cpu_stop - this will be executed by a highprio stopper thread
|
||||
* and performs thread migration by bumping thread off CPU then
|
||||
* 'pushing' onto another runqueue.
|
||||
*/
|
||||
static int migration_thread(void *data)
|
||||
static int migration_cpu_stop(void *data)
|
||||
{
|
||||
int badcpu;
|
||||
int cpu = (long)data;
|
||||
struct rq *rq;
|
||||
|
||||
rq = cpu_rq(cpu);
|
||||
BUG_ON(rq->migration_thread != current);
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
while (!kthread_should_stop()) {
|
||||
struct migration_req *req;
|
||||
struct list_head *head;
|
||||
|
||||
raw_spin_lock_irq(&rq->lock);
|
||||
|
||||
if (cpu_is_offline(cpu)) {
|
||||
raw_spin_unlock_irq(&rq->lock);
|
||||
break;
|
||||
}
|
||||
|
||||
if (rq->active_balance) {
|
||||
active_load_balance(rq, cpu);
|
||||
rq->active_balance = 0;
|
||||
}
|
||||
|
||||
head = &rq->migration_queue;
|
||||
|
||||
if (list_empty(head)) {
|
||||
raw_spin_unlock_irq(&rq->lock);
|
||||
schedule();
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
continue;
|
||||
}
|
||||
req = list_entry(head->next, struct migration_req, list);
|
||||
list_del_init(head->next);
|
||||
|
||||
if (req->task != NULL) {
|
||||
raw_spin_unlock(&rq->lock);
|
||||
__migrate_task(req->task, cpu, req->dest_cpu);
|
||||
} else if (likely(cpu == (badcpu = smp_processor_id()))) {
|
||||
req->dest_cpu = RCU_MIGRATION_GOT_QS;
|
||||
raw_spin_unlock(&rq->lock);
|
||||
} else {
|
||||
req->dest_cpu = RCU_MIGRATION_MUST_SYNC;
|
||||
raw_spin_unlock(&rq->lock);
|
||||
WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu);
|
||||
}
|
||||
local_irq_enable();
|
||||
|
||||
complete(&req->done);
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
struct migration_arg *arg = data;
|
||||
|
||||
/*
|
||||
* The original target cpu might have gone down and we might
|
||||
* be on another cpu but it doesn't matter.
|
||||
*/
|
||||
local_irq_disable();
|
||||
__migrate_task(arg->task, raw_smp_processor_id(), arg->dest_cpu);
|
||||
local_irq_enable();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -5850,35 +5777,20 @@ static void set_rq_offline(struct rq *rq)
|
||||
static int __cpuinit
|
||||
migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
{
|
||||
struct task_struct *p;
|
||||
int cpu = (long)hcpu;
|
||||
unsigned long flags;
|
||||
struct rq *rq;
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
switch (action) {
|
||||
|
||||
case CPU_UP_PREPARE:
|
||||
case CPU_UP_PREPARE_FROZEN:
|
||||
p = kthread_create(migration_thread, hcpu, "migration/%d", cpu);
|
||||
if (IS_ERR(p))
|
||||
return NOTIFY_BAD;
|
||||
kthread_bind(p, cpu);
|
||||
/* Must be high prio: stop_machine expects to yield to it. */
|
||||
rq = task_rq_lock(p, &flags);
|
||||
__setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
|
||||
task_rq_unlock(rq, &flags);
|
||||
get_task_struct(p);
|
||||
cpu_rq(cpu)->migration_thread = p;
|
||||
rq->calc_load_update = calc_load_update;
|
||||
break;
|
||||
|
||||
case CPU_ONLINE:
|
||||
case CPU_ONLINE_FROZEN:
|
||||
/* Strictly unnecessary, as first user will wake it. */
|
||||
wake_up_process(cpu_rq(cpu)->migration_thread);
|
||||
|
||||
/* Update our root-domain */
|
||||
rq = cpu_rq(cpu);
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
if (rq->rd) {
|
||||
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
|
||||
@ -5889,25 +5801,9 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
break;
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_UP_CANCELED_FROZEN:
|
||||
if (!cpu_rq(cpu)->migration_thread)
|
||||
break;
|
||||
/* Unbind it from offline cpu so it can run. Fall thru. */
|
||||
kthread_bind(cpu_rq(cpu)->migration_thread,
|
||||
cpumask_any(cpu_online_mask));
|
||||
kthread_stop(cpu_rq(cpu)->migration_thread);
|
||||
put_task_struct(cpu_rq(cpu)->migration_thread);
|
||||
cpu_rq(cpu)->migration_thread = NULL;
|
||||
break;
|
||||
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
migrate_live_tasks(cpu);
|
||||
rq = cpu_rq(cpu);
|
||||
kthread_stop(rq->migration_thread);
|
||||
put_task_struct(rq->migration_thread);
|
||||
rq->migration_thread = NULL;
|
||||
/* Idle task back to normal (off runqueue, low prio) */
|
||||
raw_spin_lock_irq(&rq->lock);
|
||||
deactivate_task(rq, rq->idle, 0);
|
||||
@ -5918,29 +5814,11 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
migrate_nr_uninterruptible(rq);
|
||||
BUG_ON(rq->nr_running != 0);
|
||||
calc_global_load_remove(rq);
|
||||
/*
|
||||
* No need to migrate the tasks: it was best-effort if
|
||||
* they didn't take sched_hotcpu_mutex. Just wake up
|
||||
* the requestors.
|
||||
*/
|
||||
raw_spin_lock_irq(&rq->lock);
|
||||
while (!list_empty(&rq->migration_queue)) {
|
||||
struct migration_req *req;
|
||||
|
||||
req = list_entry(rq->migration_queue.next,
|
||||
struct migration_req, list);
|
||||
list_del_init(&req->list);
|
||||
raw_spin_unlock_irq(&rq->lock);
|
||||
complete(&req->done);
|
||||
raw_spin_lock_irq(&rq->lock);
|
||||
}
|
||||
raw_spin_unlock_irq(&rq->lock);
|
||||
break;
|
||||
|
||||
case CPU_DYING:
|
||||
case CPU_DYING_FROZEN:
|
||||
/* Update our root-domain */
|
||||
rq = cpu_rq(cpu);
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
if (rq->rd) {
|
||||
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
|
||||
@ -7757,10 +7635,8 @@ void __init sched_init(void)
|
||||
rq->push_cpu = 0;
|
||||
rq->cpu = i;
|
||||
rq->online = 0;
|
||||
rq->migration_thread = NULL;
|
||||
rq->idle_stamp = 0;
|
||||
rq->avg_idle = 2*sysctl_sched_migration_cost;
|
||||
INIT_LIST_HEAD(&rq->migration_queue);
|
||||
rq_attach_root(rq, &def_root_domain);
|
||||
#endif
|
||||
init_rq_hrtick(rq);
|
||||
@ -9054,43 +8930,32 @@ struct cgroup_subsys cpuacct_subsys = {
|
||||
|
||||
#ifndef CONFIG_SMP
|
||||
|
||||
int rcu_expedited_torture_stats(char *page)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats);
|
||||
|
||||
void synchronize_sched_expedited(void)
|
||||
{
|
||||
barrier();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
|
||||
|
||||
#else /* #ifndef CONFIG_SMP */
|
||||
|
||||
static DEFINE_PER_CPU(struct migration_req, rcu_migration_req);
|
||||
static DEFINE_MUTEX(rcu_sched_expedited_mutex);
|
||||
static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0);
|
||||
|
||||
#define RCU_EXPEDITED_STATE_POST -2
|
||||
#define RCU_EXPEDITED_STATE_IDLE -1
|
||||
|
||||
static int rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
|
||||
|
||||
int rcu_expedited_torture_stats(char *page)
|
||||
static int synchronize_sched_expedited_cpu_stop(void *data)
|
||||
{
|
||||
int cnt = 0;
|
||||
int cpu;
|
||||
|
||||
cnt += sprintf(&page[cnt], "state: %d /", rcu_expedited_state);
|
||||
for_each_online_cpu(cpu) {
|
||||
cnt += sprintf(&page[cnt], " %d:%d",
|
||||
cpu, per_cpu(rcu_migration_req, cpu).dest_cpu);
|
||||
}
|
||||
cnt += sprintf(&page[cnt], "\n");
|
||||
return cnt;
|
||||
/*
|
||||
* There must be a full memory barrier on each affected CPU
|
||||
* between the time that try_stop_cpus() is called and the
|
||||
* time that it returns.
|
||||
*
|
||||
* In the current initial implementation of cpu_stop, the
|
||||
* above condition is already met when the control reaches
|
||||
* this point and the following smp_mb() is not strictly
|
||||
* necessary. Do smp_mb() anyway for documentation and
|
||||
* robustness against future implementation changes.
|
||||
*/
|
||||
smp_mb(); /* See above comment block. */
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats);
|
||||
|
||||
static long synchronize_sched_expedited_count;
|
||||
|
||||
/*
|
||||
* Wait for an rcu-sched grace period to elapse, but use "big hammer"
|
||||
@ -9104,18 +8969,14 @@ static long synchronize_sched_expedited_count;
|
||||
*/
|
||||
void synchronize_sched_expedited(void)
|
||||
{
|
||||
int cpu;
|
||||
unsigned long flags;
|
||||
bool need_full_sync = 0;
|
||||
struct rq *rq;
|
||||
struct migration_req *req;
|
||||
long snap;
|
||||
int trycount = 0;
|
||||
int snap, trycount = 0;
|
||||
|
||||
smp_mb(); /* ensure prior mod happens before capturing snap. */
|
||||
snap = ACCESS_ONCE(synchronize_sched_expedited_count) + 1;
|
||||
snap = atomic_read(&synchronize_sched_expedited_count) + 1;
|
||||
get_online_cpus();
|
||||
while (!mutex_trylock(&rcu_sched_expedited_mutex)) {
|
||||
while (try_stop_cpus(cpu_online_mask,
|
||||
synchronize_sched_expedited_cpu_stop,
|
||||
NULL) == -EAGAIN) {
|
||||
put_online_cpus();
|
||||
if (trycount++ < 10)
|
||||
udelay(trycount * num_online_cpus());
|
||||
@ -9123,41 +8984,15 @@ void synchronize_sched_expedited(void)
|
||||
synchronize_sched();
|
||||
return;
|
||||
}
|
||||
if (ACCESS_ONCE(synchronize_sched_expedited_count) - snap > 0) {
|
||||
if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) {
|
||||
smp_mb(); /* ensure test happens before caller kfree */
|
||||
return;
|
||||
}
|
||||
get_online_cpus();
|
||||
}
|
||||
rcu_expedited_state = RCU_EXPEDITED_STATE_POST;
|
||||
for_each_online_cpu(cpu) {
|
||||
rq = cpu_rq(cpu);
|
||||
req = &per_cpu(rcu_migration_req, cpu);
|
||||
init_completion(&req->done);
|
||||
req->task = NULL;
|
||||
req->dest_cpu = RCU_MIGRATION_NEED_QS;
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
list_add(&req->list, &rq->migration_queue);
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
wake_up_process(rq->migration_thread);
|
||||
}
|
||||
for_each_online_cpu(cpu) {
|
||||
rcu_expedited_state = cpu;
|
||||
req = &per_cpu(rcu_migration_req, cpu);
|
||||
rq = cpu_rq(cpu);
|
||||
wait_for_completion(&req->done);
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC))
|
||||
need_full_sync = 1;
|
||||
req->dest_cpu = RCU_MIGRATION_IDLE;
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
}
|
||||
rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
|
||||
synchronize_sched_expedited_count++;
|
||||
mutex_unlock(&rcu_sched_expedited_mutex);
|
||||
atomic_inc(&synchronize_sched_expedited_count);
|
||||
smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */
|
||||
put_online_cpus();
|
||||
if (need_full_sync)
|
||||
synchronize_sched();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
|
||||
|
||||
|
@ -2798,6 +2798,8 @@ static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle)
|
||||
return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
|
||||
}
|
||||
|
||||
static int active_load_balance_cpu_stop(void *data);
|
||||
|
||||
/*
|
||||
* Check this_cpu to ensure it is balanced within domain. Attempt to move
|
||||
* tasks if there is an imbalance.
|
||||
@ -2887,8 +2889,9 @@ static int load_balance(int this_cpu, struct rq *this_rq,
|
||||
if (need_active_balance(sd, sd_idle, idle)) {
|
||||
raw_spin_lock_irqsave(&busiest->lock, flags);
|
||||
|
||||
/* don't kick the migration_thread, if the curr
|
||||
* task on busiest cpu can't be moved to this_cpu
|
||||
/* don't kick the active_load_balance_cpu_stop,
|
||||
* if the curr task on busiest cpu can't be
|
||||
* moved to this_cpu
|
||||
*/
|
||||
if (!cpumask_test_cpu(this_cpu,
|
||||
&busiest->curr->cpus_allowed)) {
|
||||
@ -2898,14 +2901,22 @@ static int load_balance(int this_cpu, struct rq *this_rq,
|
||||
goto out_one_pinned;
|
||||
}
|
||||
|
||||
/*
|
||||
* ->active_balance synchronizes accesses to
|
||||
* ->active_balance_work. Once set, it's cleared
|
||||
* only after active load balance is finished.
|
||||
*/
|
||||
if (!busiest->active_balance) {
|
||||
busiest->active_balance = 1;
|
||||
busiest->push_cpu = this_cpu;
|
||||
active_balance = 1;
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&busiest->lock, flags);
|
||||
|
||||
if (active_balance)
|
||||
wake_up_process(busiest->migration_thread);
|
||||
stop_one_cpu_nowait(cpu_of(busiest),
|
||||
active_load_balance_cpu_stop, busiest,
|
||||
&busiest->active_balance_work);
|
||||
|
||||
/*
|
||||
* We've kicked active balancing, reset the failure
|
||||
@ -3012,24 +3023,29 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
|
||||
}
|
||||
|
||||
/*
|
||||
* active_load_balance is run by migration threads. It pushes running tasks
|
||||
* off the busiest CPU onto idle CPUs. It requires at least 1 task to be
|
||||
* running on each physical CPU where possible, and avoids physical /
|
||||
* logical imbalances.
|
||||
*
|
||||
* Called with busiest_rq locked.
|
||||
* active_load_balance_cpu_stop is run by cpu stopper. It pushes
|
||||
* running tasks off the busiest CPU onto idle CPUs. It requires at
|
||||
* least 1 task to be running on each physical CPU where possible, and
|
||||
* avoids physical / logical imbalances.
|
||||
*/
|
||||
static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
|
||||
static int active_load_balance_cpu_stop(void *data)
|
||||
{
|
||||
struct rq *busiest_rq = data;
|
||||
int busiest_cpu = cpu_of(busiest_rq);
|
||||
int target_cpu = busiest_rq->push_cpu;
|
||||
struct rq *target_rq = cpu_rq(target_cpu);
|
||||
struct sched_domain *sd;
|
||||
struct rq *target_rq;
|
||||
|
||||
raw_spin_lock_irq(&busiest_rq->lock);
|
||||
|
||||
/* make sure the requested cpu hasn't gone down in the meantime */
|
||||
if (unlikely(busiest_cpu != smp_processor_id() ||
|
||||
!busiest_rq->active_balance))
|
||||
goto out_unlock;
|
||||
|
||||
/* Is there any task to move? */
|
||||
if (busiest_rq->nr_running <= 1)
|
||||
return;
|
||||
|
||||
target_rq = cpu_rq(target_cpu);
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* This condition is "impossible", if it occurs
|
||||
@ -3058,6 +3074,10 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
|
||||
schedstat_inc(sd, alb_failed);
|
||||
}
|
||||
double_unlock_balance(busiest_rq, target_rq);
|
||||
out_unlock:
|
||||
busiest_rq->active_balance = 0;
|
||||
raw_spin_unlock_irq(&busiest_rq->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NO_HZ
|
||||
|
@ -1,17 +1,381 @@
|
||||
/* Copyright 2008, 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation.
|
||||
* GPL v2 and any later version.
|
||||
/*
|
||||
* kernel/stop_machine.c
|
||||
*
|
||||
* Copyright (C) 2008, 2005 IBM Corporation.
|
||||
* Copyright (C) 2008, 2005 Rusty Russell rusty@rustcorp.com.au
|
||||
* Copyright (C) 2010 SUSE Linux Products GmbH
|
||||
* Copyright (C) 2010 Tejun Heo <tj@kernel.org>
|
||||
*
|
||||
* This file is released under the GPLv2 and any later version.
|
||||
*/
|
||||
#include <linux/completion.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/stop_machine.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kallsyms.h>
|
||||
|
||||
#include <asm/atomic.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
/*
|
||||
* Structure to determine completion condition and record errors. May
|
||||
* be shared by works on different cpus.
|
||||
*/
|
||||
struct cpu_stop_done {
|
||||
atomic_t nr_todo; /* nr left to execute */
|
||||
bool executed; /* actually executed? */
|
||||
int ret; /* collected return value */
|
||||
struct completion completion; /* fired if nr_todo reaches 0 */
|
||||
};
|
||||
|
||||
/* the actual stopper, one per every possible cpu, enabled on online cpus */
|
||||
struct cpu_stopper {
|
||||
spinlock_t lock;
|
||||
struct list_head works; /* list of pending works */
|
||||
struct task_struct *thread; /* stopper thread */
|
||||
bool enabled; /* is this stopper enabled? */
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
|
||||
|
||||
static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
|
||||
{
|
||||
memset(done, 0, sizeof(*done));
|
||||
atomic_set(&done->nr_todo, nr_todo);
|
||||
init_completion(&done->completion);
|
||||
}
|
||||
|
||||
/* signal completion unless @done is NULL */
|
||||
static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed)
|
||||
{
|
||||
if (done) {
|
||||
if (executed)
|
||||
done->executed = true;
|
||||
if (atomic_dec_and_test(&done->nr_todo))
|
||||
complete(&done->completion);
|
||||
}
|
||||
}
|
||||
|
||||
/* queue @work to @stopper. if offline, @work is completed immediately */
|
||||
static void cpu_stop_queue_work(struct cpu_stopper *stopper,
|
||||
struct cpu_stop_work *work)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&stopper->lock, flags);
|
||||
|
||||
if (stopper->enabled) {
|
||||
list_add_tail(&work->list, &stopper->works);
|
||||
wake_up_process(stopper->thread);
|
||||
} else
|
||||
cpu_stop_signal_done(work->done, false);
|
||||
|
||||
spin_unlock_irqrestore(&stopper->lock, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* stop_one_cpu - stop a cpu
|
||||
* @cpu: cpu to stop
|
||||
* @fn: function to execute
|
||||
* @arg: argument to @fn
|
||||
*
|
||||
* Execute @fn(@arg) on @cpu. @fn is run in a process context with
|
||||
* the highest priority preempting any task on the cpu and
|
||||
* monopolizing it. This function returns after the execution is
|
||||
* complete.
|
||||
*
|
||||
* This function doesn't guarantee @cpu stays online till @fn
|
||||
* completes. If @cpu goes down in the middle, execution may happen
|
||||
* partially or fully on different cpus. @fn should either be ready
|
||||
* for that or the caller should ensure that @cpu stays online until
|
||||
* this function completes.
|
||||
*
|
||||
* CONTEXT:
|
||||
* Might sleep.
|
||||
*
|
||||
* RETURNS:
|
||||
* -ENOENT if @fn(@arg) was not executed because @cpu was offline;
|
||||
* otherwise, the return value of @fn.
|
||||
*/
|
||||
int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
|
||||
{
|
||||
struct cpu_stop_done done;
|
||||
struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
|
||||
|
||||
cpu_stop_init_done(&done, 1);
|
||||
cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), &work);
|
||||
wait_for_completion(&done.completion);
|
||||
return done.executed ? done.ret : -ENOENT;
|
||||
}
|
||||
|
||||
/**
|
||||
* stop_one_cpu_nowait - stop a cpu but don't wait for completion
|
||||
* @cpu: cpu to stop
|
||||
* @fn: function to execute
|
||||
* @arg: argument to @fn
|
||||
*
|
||||
* Similar to stop_one_cpu() but doesn't wait for completion. The
|
||||
* caller is responsible for ensuring @work_buf is currently unused
|
||||
* and will remain untouched until stopper starts executing @fn.
|
||||
*
|
||||
* CONTEXT:
|
||||
* Don't care.
|
||||
*/
|
||||
void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
|
||||
struct cpu_stop_work *work_buf)
|
||||
{
|
||||
*work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
|
||||
cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), work_buf);
|
||||
}
|
||||
|
||||
/* static data for stop_cpus */
|
||||
static DEFINE_MUTEX(stop_cpus_mutex);
|
||||
static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
|
||||
|
||||
int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
|
||||
{
|
||||
struct cpu_stop_work *work;
|
||||
struct cpu_stop_done done;
|
||||
unsigned int cpu;
|
||||
|
||||
/* initialize works and done */
|
||||
for_each_cpu(cpu, cpumask) {
|
||||
work = &per_cpu(stop_cpus_work, cpu);
|
||||
work->fn = fn;
|
||||
work->arg = arg;
|
||||
work->done = &done;
|
||||
}
|
||||
cpu_stop_init_done(&done, cpumask_weight(cpumask));
|
||||
|
||||
/*
|
||||
* Disable preemption while queueing to avoid getting
|
||||
* preempted by a stopper which might wait for other stoppers
|
||||
* to enter @fn which can lead to deadlock.
|
||||
*/
|
||||
preempt_disable();
|
||||
for_each_cpu(cpu, cpumask)
|
||||
cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu),
|
||||
&per_cpu(stop_cpus_work, cpu));
|
||||
preempt_enable();
|
||||
|
||||
wait_for_completion(&done.completion);
|
||||
return done.executed ? done.ret : -ENOENT;
|
||||
}
|
||||
|
||||
/**
|
||||
* stop_cpus - stop multiple cpus
|
||||
* @cpumask: cpus to stop
|
||||
* @fn: function to execute
|
||||
* @arg: argument to @fn
|
||||
*
|
||||
* Execute @fn(@arg) on online cpus in @cpumask. On each target cpu,
|
||||
* @fn is run in a process context with the highest priority
|
||||
* preempting any task on the cpu and monopolizing it. This function
|
||||
* returns after all executions are complete.
|
||||
*
|
||||
* This function doesn't guarantee the cpus in @cpumask stay online
|
||||
* till @fn completes. If some cpus go down in the middle, execution
|
||||
* on the cpu may happen partially or fully on different cpus. @fn
|
||||
* should either be ready for that or the caller should ensure that
|
||||
* the cpus stay online until this function completes.
|
||||
*
|
||||
* All stop_cpus() calls are serialized making it safe for @fn to wait
|
||||
* for all cpus to start executing it.
|
||||
*
|
||||
* CONTEXT:
|
||||
* Might sleep.
|
||||
*
|
||||
* RETURNS:
|
||||
* -ENOENT if @fn(@arg) was not executed at all because all cpus in
|
||||
* @cpumask were offline; otherwise, 0 if all executions of @fn
|
||||
* returned 0, any non zero return value if any returned non zero.
|
||||
*/
|
||||
int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* static works are used, process one request at a time */
|
||||
mutex_lock(&stop_cpus_mutex);
|
||||
ret = __stop_cpus(cpumask, fn, arg);
|
||||
mutex_unlock(&stop_cpus_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* try_stop_cpus - try to stop multiple cpus
|
||||
* @cpumask: cpus to stop
|
||||
* @fn: function to execute
|
||||
* @arg: argument to @fn
|
||||
*
|
||||
* Identical to stop_cpus() except that it fails with -EAGAIN if
|
||||
* someone else is already using the facility.
|
||||
*
|
||||
* CONTEXT:
|
||||
* Might sleep.
|
||||
*
|
||||
* RETURNS:
|
||||
* -EAGAIN if someone else is already stopping cpus, -ENOENT if
|
||||
* @fn(@arg) was not executed at all because all cpus in @cpumask were
|
||||
* offline; otherwise, 0 if all executions of @fn returned 0, any non
|
||||
* zero return value if any returned non zero.
|
||||
*/
|
||||
int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* static works are used, process one request at a time */
|
||||
if (!mutex_trylock(&stop_cpus_mutex))
|
||||
return -EAGAIN;
|
||||
ret = __stop_cpus(cpumask, fn, arg);
|
||||
mutex_unlock(&stop_cpus_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int cpu_stopper_thread(void *data)
|
||||
{
|
||||
struct cpu_stopper *stopper = data;
|
||||
struct cpu_stop_work *work;
|
||||
int ret;
|
||||
|
||||
repeat:
|
||||
set_current_state(TASK_INTERRUPTIBLE); /* mb paired w/ kthread_stop */
|
||||
|
||||
if (kthread_should_stop()) {
|
||||
__set_current_state(TASK_RUNNING);
|
||||
return 0;
|
||||
}
|
||||
|
||||
work = NULL;
|
||||
spin_lock_irq(&stopper->lock);
|
||||
if (!list_empty(&stopper->works)) {
|
||||
work = list_first_entry(&stopper->works,
|
||||
struct cpu_stop_work, list);
|
||||
list_del_init(&work->list);
|
||||
}
|
||||
spin_unlock_irq(&stopper->lock);
|
||||
|
||||
if (work) {
|
||||
cpu_stop_fn_t fn = work->fn;
|
||||
void *arg = work->arg;
|
||||
struct cpu_stop_done *done = work->done;
|
||||
char ksym_buf[KSYM_NAME_LEN];
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
/* cpu stop callbacks are not allowed to sleep */
|
||||
preempt_disable();
|
||||
|
||||
ret = fn(arg);
|
||||
if (ret)
|
||||
done->ret = ret;
|
||||
|
||||
/* restore preemption and check it's still balanced */
|
||||
preempt_enable();
|
||||
WARN_ONCE(preempt_count(),
|
||||
"cpu_stop: %s(%p) leaked preempt count\n",
|
||||
kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL,
|
||||
ksym_buf), arg);
|
||||
|
||||
cpu_stop_signal_done(done, true);
|
||||
} else
|
||||
schedule();
|
||||
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
/* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */
|
||||
static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
|
||||
unsigned int cpu = (unsigned long)hcpu;
|
||||
struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
|
||||
struct cpu_stop_work *work;
|
||||
struct task_struct *p;
|
||||
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
case CPU_UP_PREPARE:
|
||||
BUG_ON(stopper->thread || stopper->enabled ||
|
||||
!list_empty(&stopper->works));
|
||||
p = kthread_create(cpu_stopper_thread, stopper, "migration/%d",
|
||||
cpu);
|
||||
if (IS_ERR(p))
|
||||
return NOTIFY_BAD;
|
||||
sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m);
|
||||
get_task_struct(p);
|
||||
stopper->thread = p;
|
||||
break;
|
||||
|
||||
case CPU_ONLINE:
|
||||
kthread_bind(stopper->thread, cpu);
|
||||
/* strictly unnecessary, as first user will wake it */
|
||||
wake_up_process(stopper->thread);
|
||||
/* mark enabled */
|
||||
spin_lock_irq(&stopper->lock);
|
||||
stopper->enabled = true;
|
||||
spin_unlock_irq(&stopper->lock);
|
||||
break;
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_DEAD:
|
||||
/* kill the stopper */
|
||||
kthread_stop(stopper->thread);
|
||||
/* drain remaining works */
|
||||
spin_lock_irq(&stopper->lock);
|
||||
list_for_each_entry(work, &stopper->works, list)
|
||||
cpu_stop_signal_done(work->done, false);
|
||||
stopper->enabled = false;
|
||||
spin_unlock_irq(&stopper->lock);
|
||||
/* release the stopper */
|
||||
put_task_struct(stopper->thread);
|
||||
stopper->thread = NULL;
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Give it a higher priority so that cpu stopper is available to other
|
||||
* cpu notifiers. It currently shares the same priority as sched
|
||||
* migration_notifier.
|
||||
*/
|
||||
static struct notifier_block __cpuinitdata cpu_stop_cpu_notifier = {
|
||||
.notifier_call = cpu_stop_cpu_callback,
|
||||
.priority = 10,
|
||||
};
|
||||
|
||||
static int __init cpu_stop_init(void)
|
||||
{
|
||||
void *bcpu = (void *)(long)smp_processor_id();
|
||||
unsigned int cpu;
|
||||
int err;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
|
||||
|
||||
spin_lock_init(&stopper->lock);
|
||||
INIT_LIST_HEAD(&stopper->works);
|
||||
}
|
||||
|
||||
/* start one for the boot cpu */
|
||||
err = cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_UP_PREPARE,
|
||||
bcpu);
|
||||
BUG_ON(err == NOTIFY_BAD);
|
||||
cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu);
|
||||
register_cpu_notifier(&cpu_stop_cpu_notifier);
|
||||
|
||||
return 0;
|
||||
}
|
||||
early_initcall(cpu_stop_init);
|
||||
|
||||
#ifdef CONFIG_STOP_MACHINE
|
||||
|
||||
/* This controls the threads on each CPU. */
|
||||
enum stopmachine_state {
|
||||
@ -26,174 +390,94 @@ enum stopmachine_state {
|
||||
/* Exit */
|
||||
STOPMACHINE_EXIT,
|
||||
};
|
||||
static enum stopmachine_state state;
|
||||
|
||||
struct stop_machine_data {
|
||||
int (*fn)(void *);
|
||||
void *data;
|
||||
int fnret;
|
||||
int (*fn)(void *);
|
||||
void *data;
|
||||
/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
|
||||
unsigned int num_threads;
|
||||
const struct cpumask *active_cpus;
|
||||
|
||||
enum stopmachine_state state;
|
||||
atomic_t thread_ack;
|
||||
};
|
||||
|
||||
/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
|
||||
static unsigned int num_threads;
|
||||
static atomic_t thread_ack;
|
||||
static DEFINE_MUTEX(lock);
|
||||
/* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */
|
||||
static DEFINE_MUTEX(setup_lock);
|
||||
/* Users of stop_machine. */
|
||||
static int refcount;
|
||||
static struct workqueue_struct *stop_machine_wq;
|
||||
static struct stop_machine_data active, idle;
|
||||
static const struct cpumask *active_cpus;
|
||||
static void __percpu *stop_machine_work;
|
||||
|
||||
static void set_state(enum stopmachine_state newstate)
|
||||
static void set_state(struct stop_machine_data *smdata,
|
||||
enum stopmachine_state newstate)
|
||||
{
|
||||
/* Reset ack counter. */
|
||||
atomic_set(&thread_ack, num_threads);
|
||||
atomic_set(&smdata->thread_ack, smdata->num_threads);
|
||||
smp_wmb();
|
||||
state = newstate;
|
||||
smdata->state = newstate;
|
||||
}
|
||||
|
||||
/* Last one to ack a state moves to the next state. */
|
||||
static void ack_state(void)
|
||||
static void ack_state(struct stop_machine_data *smdata)
|
||||
{
|
||||
if (atomic_dec_and_test(&thread_ack))
|
||||
set_state(state + 1);
|
||||
if (atomic_dec_and_test(&smdata->thread_ack))
|
||||
set_state(smdata, smdata->state + 1);
|
||||
}
|
||||
|
||||
/* This is the actual function which stops the CPU. It runs
|
||||
* in the context of a dedicated stopmachine workqueue. */
|
||||
static void stop_cpu(struct work_struct *unused)
|
||||
/* This is the cpu_stop function which stops the CPU. */
|
||||
static int stop_machine_cpu_stop(void *data)
|
||||
{
|
||||
struct stop_machine_data *smdata = data;
|
||||
enum stopmachine_state curstate = STOPMACHINE_NONE;
|
||||
struct stop_machine_data *smdata = &idle;
|
||||
int cpu = smp_processor_id();
|
||||
int err;
|
||||
int cpu = smp_processor_id(), err = 0;
|
||||
bool is_active;
|
||||
|
||||
if (!smdata->active_cpus)
|
||||
is_active = cpu == cpumask_first(cpu_online_mask);
|
||||
else
|
||||
is_active = cpumask_test_cpu(cpu, smdata->active_cpus);
|
||||
|
||||
if (!active_cpus) {
|
||||
if (cpu == cpumask_first(cpu_online_mask))
|
||||
smdata = &active;
|
||||
} else {
|
||||
if (cpumask_test_cpu(cpu, active_cpus))
|
||||
smdata = &active;
|
||||
}
|
||||
/* Simple state machine */
|
||||
do {
|
||||
/* Chill out and ensure we re-read stopmachine_state. */
|
||||
cpu_relax();
|
||||
if (state != curstate) {
|
||||
curstate = state;
|
||||
if (smdata->state != curstate) {
|
||||
curstate = smdata->state;
|
||||
switch (curstate) {
|
||||
case STOPMACHINE_DISABLE_IRQ:
|
||||
local_irq_disable();
|
||||
hard_irq_disable();
|
||||
break;
|
||||
case STOPMACHINE_RUN:
|
||||
/* On multiple CPUs only a single error code
|
||||
* is needed to tell that something failed. */
|
||||
err = smdata->fn(smdata->data);
|
||||
if (err)
|
||||
smdata->fnret = err;
|
||||
if (is_active)
|
||||
err = smdata->fn(smdata->data);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
ack_state();
|
||||
ack_state(smdata);
|
||||
}
|
||||
} while (curstate != STOPMACHINE_EXIT);
|
||||
|
||||
local_irq_enable();
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Callback for CPUs which aren't supposed to do anything. */
|
||||
static int chill(void *unused)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int stop_machine_create(void)
|
||||
{
|
||||
mutex_lock(&setup_lock);
|
||||
if (refcount)
|
||||
goto done;
|
||||
stop_machine_wq = create_rt_workqueue("kstop");
|
||||
if (!stop_machine_wq)
|
||||
goto err_out;
|
||||
stop_machine_work = alloc_percpu(struct work_struct);
|
||||
if (!stop_machine_work)
|
||||
goto err_out;
|
||||
done:
|
||||
refcount++;
|
||||
mutex_unlock(&setup_lock);
|
||||
return 0;
|
||||
|
||||
err_out:
|
||||
if (stop_machine_wq)
|
||||
destroy_workqueue(stop_machine_wq);
|
||||
mutex_unlock(&setup_lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(stop_machine_create);
|
||||
|
||||
void stop_machine_destroy(void)
|
||||
{
|
||||
mutex_lock(&setup_lock);
|
||||
refcount--;
|
||||
if (refcount)
|
||||
goto done;
|
||||
destroy_workqueue(stop_machine_wq);
|
||||
free_percpu(stop_machine_work);
|
||||
done:
|
||||
mutex_unlock(&setup_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(stop_machine_destroy);
|
||||
|
||||
int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
|
||||
{
|
||||
struct work_struct *sm_work;
|
||||
int i, ret;
|
||||
struct stop_machine_data smdata = { .fn = fn, .data = data,
|
||||
.num_threads = num_online_cpus(),
|
||||
.active_cpus = cpus };
|
||||
|
||||
/* Set up initial state. */
|
||||
mutex_lock(&lock);
|
||||
num_threads = num_online_cpus();
|
||||
active_cpus = cpus;
|
||||
active.fn = fn;
|
||||
active.data = data;
|
||||
active.fnret = 0;
|
||||
idle.fn = chill;
|
||||
idle.data = NULL;
|
||||
|
||||
set_state(STOPMACHINE_PREPARE);
|
||||
|
||||
/* Schedule the stop_cpu work on all cpus: hold this CPU so one
|
||||
* doesn't hit this CPU until we're ready. */
|
||||
get_cpu();
|
||||
for_each_online_cpu(i) {
|
||||
sm_work = per_cpu_ptr(stop_machine_work, i);
|
||||
INIT_WORK(sm_work, stop_cpu);
|
||||
queue_work_on(i, stop_machine_wq, sm_work);
|
||||
}
|
||||
/* This will release the thread on our CPU. */
|
||||
put_cpu();
|
||||
flush_workqueue(stop_machine_wq);
|
||||
ret = active.fnret;
|
||||
mutex_unlock(&lock);
|
||||
return ret;
|
||||
/* Set the initial state and stop all online cpus. */
|
||||
set_state(&smdata, STOPMACHINE_PREPARE);
|
||||
return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
|
||||
}
|
||||
|
||||
int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = stop_machine_create();
|
||||
if (ret)
|
||||
return ret;
|
||||
/* No CPUs can come up or down during this. */
|
||||
get_online_cpus();
|
||||
ret = __stop_machine(fn, data, cpus);
|
||||
put_online_cpus();
|
||||
stop_machine_destroy();
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(stop_machine);
|
||||
|
||||
#endif /* CONFIG_STOP_MACHINE */
|
||||
|
Loading…
Reference in New Issue
Block a user