mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
2a911f0bb7
This patch reduces lock complexity of SPU scheduler, particularly for involuntary preemptive switches. As a result the new code does a better job of mapping the highest priority tasks to SPUs. Lock complexity is reduced by using the system default workqueue to perform involuntary saves. In this way we avoid nasty lock ordering problems that the previous code had. A "minimum timeslice" for SPU contexts is also introduced. The intent here is to avoid thrashing. While the new scheduler does a better job at prioritization it still does nothing for fairness. From: Mark Nutter <mnutter@us.ibm.com> Signed-off-by: Arnd Bergmann <arndb@de.ibm.com> Signed-off-by: Paul Mackerras <paulus@samba.org>
457 lines
10 KiB
C
457 lines
10 KiB
C
/* sched.c - SPU scheduler.
|
|
*
|
|
* Copyright (C) IBM 2005
|
|
* Author: Mark Nutter <mnutter@us.ibm.com>
|
|
*
|
|
* SPU scheduler, based on Linux thread priority. For now use
|
|
* a simple "cooperative" yield model with no preemption. SPU
|
|
* scheduling will eventually be preemptive: When a thread with
|
|
* a higher static priority gets ready to run, then an active SPU
|
|
* context will be preempted and returned to the waitq.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2, or (at your option)
|
|
* any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
*/
|
|
|
|
#undef DEBUG
|
|
|
|
#include <linux/config.h>
|
|
#include <linux/module.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/completion.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/smp_lock.h>
|
|
#include <linux/stddef.h>
|
|
#include <linux/unistd.h>
|
|
|
|
#include <asm/io.h>
|
|
#include <asm/mmu_context.h>
|
|
#include <asm/spu.h>
|
|
#include <asm/spu_csa.h>
|
|
#include "spufs.h"
|
|
|
|
#define SPU_MIN_TIMESLICE (100 * HZ / 1000))
|
|
|
|
#define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1)
|
|
struct spu_prio_array {
|
|
atomic_t nr_blocked;
|
|
unsigned long bitmap[SPU_BITMAP_SIZE];
|
|
wait_queue_head_t waitq[MAX_PRIO];
|
|
};
|
|
|
|
/* spu_runqueue - This is the main runqueue data structure for SPUs. */
|
|
struct spu_runqueue {
|
|
struct semaphore sem;
|
|
unsigned long nr_active;
|
|
unsigned long nr_idle;
|
|
unsigned long nr_switches;
|
|
struct list_head active_list;
|
|
struct list_head idle_list;
|
|
struct spu_prio_array prio;
|
|
};
|
|
|
|
static struct spu_runqueue *spu_runqueues = NULL;
|
|
|
|
static inline struct spu_runqueue *spu_rq(void)
|
|
{
|
|
/* Future: make this a per-NODE array,
|
|
* and use cpu_to_node(smp_processor_id())
|
|
*/
|
|
return spu_runqueues;
|
|
}
|
|
|
|
static inline struct spu *del_idle(struct spu_runqueue *rq)
|
|
{
|
|
struct spu *spu;
|
|
|
|
BUG_ON(rq->nr_idle <= 0);
|
|
BUG_ON(list_empty(&rq->idle_list));
|
|
/* Future: Move SPU out of low-power SRI state. */
|
|
spu = list_entry(rq->idle_list.next, struct spu, sched_list);
|
|
list_del_init(&spu->sched_list);
|
|
rq->nr_idle--;
|
|
return spu;
|
|
}
|
|
|
|
static inline void del_active(struct spu_runqueue *rq, struct spu *spu)
|
|
{
|
|
BUG_ON(rq->nr_active <= 0);
|
|
BUG_ON(list_empty(&rq->active_list));
|
|
list_del_init(&spu->sched_list);
|
|
rq->nr_active--;
|
|
}
|
|
|
|
static inline void add_idle(struct spu_runqueue *rq, struct spu *spu)
|
|
{
|
|
/* Future: Put SPU into low-power SRI state. */
|
|
list_add_tail(&spu->sched_list, &rq->idle_list);
|
|
rq->nr_idle++;
|
|
}
|
|
|
|
static inline void add_active(struct spu_runqueue *rq, struct spu *spu)
|
|
{
|
|
rq->nr_active++;
|
|
rq->nr_switches++;
|
|
list_add_tail(&spu->sched_list, &rq->active_list);
|
|
}
|
|
|
|
static void prio_wakeup(struct spu_runqueue *rq)
|
|
{
|
|
if (atomic_read(&rq->prio.nr_blocked) && rq->nr_idle) {
|
|
int best = sched_find_first_bit(rq->prio.bitmap);
|
|
if (best < MAX_PRIO) {
|
|
wait_queue_head_t *wq = &rq->prio.waitq[best];
|
|
wake_up_interruptible_nr(wq, 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void prio_wait(struct spu_runqueue *rq, struct spu_context *ctx,
|
|
u64 flags)
|
|
{
|
|
int prio = current->prio;
|
|
wait_queue_head_t *wq = &rq->prio.waitq[prio];
|
|
DEFINE_WAIT(wait);
|
|
|
|
__set_bit(prio, rq->prio.bitmap);
|
|
atomic_inc(&rq->prio.nr_blocked);
|
|
prepare_to_wait_exclusive(wq, &wait, TASK_INTERRUPTIBLE);
|
|
if (!signal_pending(current)) {
|
|
up(&rq->sem);
|
|
up_write(&ctx->state_sema);
|
|
pr_debug("%s: pid=%d prio=%d\n", __FUNCTION__,
|
|
current->pid, current->prio);
|
|
schedule();
|
|
down_write(&ctx->state_sema);
|
|
down(&rq->sem);
|
|
}
|
|
finish_wait(wq, &wait);
|
|
atomic_dec(&rq->prio.nr_blocked);
|
|
if (!waitqueue_active(wq))
|
|
__clear_bit(prio, rq->prio.bitmap);
|
|
}
|
|
|
|
static inline int is_best_prio(struct spu_runqueue *rq)
|
|
{
|
|
int best_prio;
|
|
|
|
best_prio = sched_find_first_bit(rq->prio.bitmap);
|
|
return (current->prio < best_prio) ? 1 : 0;
|
|
}
|
|
|
|
static inline void mm_needs_global_tlbie(struct mm_struct *mm)
|
|
{
|
|
/* Global TLBIE broadcast required with SPEs. */
|
|
#if (NR_CPUS > 1)
|
|
__cpus_setall(&mm->cpu_vm_mask, NR_CPUS);
|
|
#else
|
|
__cpus_setall(&mm->cpu_vm_mask, NR_CPUS+1); /* is this ok? */
|
|
#endif
|
|
}
|
|
|
|
static inline void bind_context(struct spu *spu, struct spu_context *ctx)
|
|
{
|
|
pr_debug("%s: pid=%d SPU=%d\n", __FUNCTION__, current->pid,
|
|
spu->number);
|
|
spu->ctx = ctx;
|
|
spu->flags = 0;
|
|
ctx->flags = 0;
|
|
ctx->spu = spu;
|
|
ctx->ops = &spu_hw_ops;
|
|
spu->pid = current->pid;
|
|
spu->prio = current->prio;
|
|
spu->mm = ctx->owner;
|
|
mm_needs_global_tlbie(spu->mm);
|
|
spu->ibox_callback = spufs_ibox_callback;
|
|
spu->wbox_callback = spufs_wbox_callback;
|
|
spu->stop_callback = spufs_stop_callback;
|
|
mb();
|
|
spu_unmap_mappings(ctx);
|
|
spu_restore(&ctx->csa, spu);
|
|
spu->timestamp = jiffies;
|
|
}
|
|
|
|
static inline void unbind_context(struct spu *spu, struct spu_context *ctx)
|
|
{
|
|
pr_debug("%s: unbind pid=%d SPU=%d\n", __FUNCTION__,
|
|
spu->pid, spu->number);
|
|
spu_unmap_mappings(ctx);
|
|
spu_save(&ctx->csa, spu);
|
|
spu->timestamp = jiffies;
|
|
ctx->state = SPU_STATE_SAVED;
|
|
spu->ibox_callback = NULL;
|
|
spu->wbox_callback = NULL;
|
|
spu->stop_callback = NULL;
|
|
spu->mm = NULL;
|
|
spu->pid = 0;
|
|
spu->prio = MAX_PRIO;
|
|
ctx->ops = &spu_backing_ops;
|
|
ctx->spu = NULL;
|
|
ctx->flags = 0;
|
|
spu->flags = 0;
|
|
spu->ctx = NULL;
|
|
}
|
|
|
|
static void spu_reaper(void *data)
|
|
{
|
|
struct spu_context *ctx = data;
|
|
struct spu *spu;
|
|
|
|
down_write(&ctx->state_sema);
|
|
spu = ctx->spu;
|
|
if (spu && (ctx->flags & SPU_CONTEXT_PREEMPT)) {
|
|
if (atomic_read(&spu->rq->prio.nr_blocked)) {
|
|
pr_debug("%s: spu=%d\n", __func__, spu->number);
|
|
ctx->ops->runcntl_stop(ctx);
|
|
spu_deactivate(ctx);
|
|
wake_up_all(&ctx->stop_wq);
|
|
} else {
|
|
clear_bit(SPU_CONTEXT_PREEMPT_nr, &ctx->flags);
|
|
}
|
|
}
|
|
up_write(&ctx->state_sema);
|
|
put_spu_context(ctx);
|
|
}
|
|
|
|
static void schedule_spu_reaper(struct spu_runqueue *rq, struct spu *spu)
|
|
{
|
|
struct spu_context *ctx = get_spu_context(spu->ctx);
|
|
unsigned long now = jiffies;
|
|
unsigned long expire = spu->timestamp + SPU_MIN_TIMESLICE;
|
|
|
|
set_bit(SPU_CONTEXT_PREEMPT_nr, &ctx->flags);
|
|
INIT_WORK(&ctx->reap_work, spu_reaper, ctx);
|
|
if (time_after(now, expire))
|
|
schedule_work(&ctx->reap_work);
|
|
else
|
|
schedule_delayed_work(&ctx->reap_work, expire - now);
|
|
}
|
|
|
|
static void check_preempt_active(struct spu_runqueue *rq)
|
|
{
|
|
struct list_head *p;
|
|
struct spu *worst = NULL;
|
|
|
|
list_for_each(p, &rq->active_list) {
|
|
struct spu *spu = list_entry(p, struct spu, sched_list);
|
|
struct spu_context *ctx = spu->ctx;
|
|
if (!(ctx->flags & SPU_CONTEXT_PREEMPT)) {
|
|
if (!worst || (spu->prio > worst->prio)) {
|
|
worst = spu;
|
|
}
|
|
}
|
|
}
|
|
if (worst && (current->prio < worst->prio))
|
|
schedule_spu_reaper(rq, worst);
|
|
}
|
|
|
|
static struct spu *get_idle_spu(struct spu_context *ctx, u64 flags)
|
|
{
|
|
struct spu_runqueue *rq;
|
|
struct spu *spu = NULL;
|
|
|
|
rq = spu_rq();
|
|
down(&rq->sem);
|
|
for (;;) {
|
|
if (rq->nr_idle > 0) {
|
|
if (is_best_prio(rq)) {
|
|
/* Fall through. */
|
|
spu = del_idle(rq);
|
|
break;
|
|
} else {
|
|
prio_wakeup(rq);
|
|
up(&rq->sem);
|
|
yield();
|
|
if (signal_pending(current)) {
|
|
return NULL;
|
|
}
|
|
rq = spu_rq();
|
|
down(&rq->sem);
|
|
continue;
|
|
}
|
|
} else {
|
|
check_preempt_active(rq);
|
|
prio_wait(rq, ctx, flags);
|
|
if (signal_pending(current)) {
|
|
prio_wakeup(rq);
|
|
spu = NULL;
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
up(&rq->sem);
|
|
return spu;
|
|
}
|
|
|
|
static void put_idle_spu(struct spu *spu)
|
|
{
|
|
struct spu_runqueue *rq = spu->rq;
|
|
|
|
down(&rq->sem);
|
|
add_idle(rq, spu);
|
|
prio_wakeup(rq);
|
|
up(&rq->sem);
|
|
}
|
|
|
|
static int get_active_spu(struct spu *spu)
|
|
{
|
|
struct spu_runqueue *rq = spu->rq;
|
|
struct list_head *p;
|
|
struct spu *tmp;
|
|
int rc = 0;
|
|
|
|
down(&rq->sem);
|
|
list_for_each(p, &rq->active_list) {
|
|
tmp = list_entry(p, struct spu, sched_list);
|
|
if (tmp == spu) {
|
|
del_active(rq, spu);
|
|
rc = 1;
|
|
break;
|
|
}
|
|
}
|
|
up(&rq->sem);
|
|
return rc;
|
|
}
|
|
|
|
static void put_active_spu(struct spu *spu)
|
|
{
|
|
struct spu_runqueue *rq = spu->rq;
|
|
|
|
down(&rq->sem);
|
|
add_active(rq, spu);
|
|
up(&rq->sem);
|
|
}
|
|
|
|
/* Lock order:
|
|
* spu_activate() & spu_deactivate() require the
|
|
* caller to have down_write(&ctx->state_sema).
|
|
*
|
|
* The rq->sem is breifly held (inside or outside a
|
|
* given ctx lock) for list management, but is never
|
|
* held during save/restore.
|
|
*/
|
|
|
|
int spu_activate(struct spu_context *ctx, u64 flags)
|
|
{
|
|
struct spu *spu;
|
|
|
|
if (ctx->spu)
|
|
return 0;
|
|
spu = get_idle_spu(ctx, flags);
|
|
if (!spu)
|
|
return (signal_pending(current)) ? -ERESTARTSYS : -EAGAIN;
|
|
bind_context(spu, ctx);
|
|
put_active_spu(spu);
|
|
return 0;
|
|
}
|
|
|
|
void spu_deactivate(struct spu_context *ctx)
|
|
{
|
|
struct spu *spu;
|
|
int needs_idle;
|
|
|
|
spu = ctx->spu;
|
|
if (!spu)
|
|
return;
|
|
needs_idle = get_active_spu(spu);
|
|
unbind_context(spu, ctx);
|
|
if (needs_idle)
|
|
put_idle_spu(spu);
|
|
}
|
|
|
|
void spu_yield(struct spu_context *ctx)
|
|
{
|
|
struct spu *spu;
|
|
int need_yield = 0;
|
|
|
|
down_write(&ctx->state_sema);
|
|
spu = ctx->spu;
|
|
if (spu && (sched_find_first_bit(spu->rq->prio.bitmap) < MAX_PRIO)) {
|
|
pr_debug("%s: yielding SPU %d\n", __FUNCTION__, spu->number);
|
|
spu_deactivate(ctx);
|
|
ctx->state = SPU_STATE_SAVED;
|
|
need_yield = 1;
|
|
} else if (spu) {
|
|
spu->prio = MAX_PRIO;
|
|
}
|
|
up_write(&ctx->state_sema);
|
|
if (unlikely(need_yield))
|
|
yield();
|
|
}
|
|
|
|
int __init spu_sched_init(void)
|
|
{
|
|
struct spu_runqueue *rq;
|
|
struct spu *spu;
|
|
int i;
|
|
|
|
rq = spu_runqueues = kmalloc(sizeof(struct spu_runqueue), GFP_KERNEL);
|
|
if (!rq) {
|
|
printk(KERN_WARNING "%s: Unable to allocate runqueues.\n",
|
|
__FUNCTION__);
|
|
return 1;
|
|
}
|
|
memset(rq, 0, sizeof(struct spu_runqueue));
|
|
init_MUTEX(&rq->sem);
|
|
INIT_LIST_HEAD(&rq->active_list);
|
|
INIT_LIST_HEAD(&rq->idle_list);
|
|
rq->nr_active = 0;
|
|
rq->nr_idle = 0;
|
|
rq->nr_switches = 0;
|
|
atomic_set(&rq->prio.nr_blocked, 0);
|
|
for (i = 0; i < MAX_PRIO; i++) {
|
|
init_waitqueue_head(&rq->prio.waitq[i]);
|
|
__clear_bit(i, rq->prio.bitmap);
|
|
}
|
|
__set_bit(MAX_PRIO, rq->prio.bitmap);
|
|
for (;;) {
|
|
spu = spu_alloc();
|
|
if (!spu)
|
|
break;
|
|
pr_debug("%s: adding SPU[%d]\n", __FUNCTION__, spu->number);
|
|
add_idle(rq, spu);
|
|
spu->rq = rq;
|
|
spu->timestamp = jiffies;
|
|
}
|
|
if (!rq->nr_idle) {
|
|
printk(KERN_WARNING "%s: No available SPUs.\n", __FUNCTION__);
|
|
kfree(rq);
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
void __exit spu_sched_exit(void)
|
|
{
|
|
struct spu_runqueue *rq = spu_rq();
|
|
struct spu *spu;
|
|
|
|
if (!rq) {
|
|
printk(KERN_WARNING "%s: no runqueues!\n", __FUNCTION__);
|
|
return;
|
|
}
|
|
while (rq->nr_idle > 0) {
|
|
spu = del_idle(rq);
|
|
if (!spu)
|
|
break;
|
|
spu_free(spu);
|
|
}
|
|
kfree(rq);
|
|
}
|