linux_dsm_epyc7002/arch/x86/kernel/uv_irq.c
Suresh Siddha 18374d89e5 x86, irq: Allow 0xff for /proc/irq/[n]/smp_affinity on an 8-cpu system
John Blackwood reported:
> on an older Dell PowerEdge 6650 system with 8 cpus (4 are hyper-threaded),
> and  32 bit (x86) kernel, once you change the irq smp_affinity of an irq
> to be less than all cpus in the system, you can never change really the
> irq smp_affinity back to be all cpus in the system (0xff) again,
> even though no error status is returned on the "/bin/echo ff >
> /proc/irq/[n]/smp_affinity" operation.
>
> This is due to that fact that BAD_APICID has the same value as
> all cpus (0xff) on 32bit kernels, and thus the value returned from
> set_desc_affinity() via the cpu_mask_to_apicid_and() function is treated
> as a failure in set_ioapic_affinity_irq_desc(), and no affinity changes
> are made.

set_desc_affinity() is already checking if the incoming cpu mask
intersects with the cpu online mask or not. So there is no need
for the apic op cpu_mask_to_apicid_and() to check again
and return BAD_APICID.

Remove the BAD_APICID return value from cpu_mask_to_apicid_and()
and also fix set_desc_affinity() to return -1 instead of using BAD_APICID
to represent error conditions (as cpu_mask_to_apicid_and() can return
logical or physical apicid values and BAD_APICID is really to represent
bad physical apic id).

Reported-by: John Blackwood <john.blackwood@ccur.com>
Root-caused-by: John Blackwood <john.blackwood@ccur.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
LKML-Reference: <1261103386.2535.409.camel@sbs-t61>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
2009-12-17 22:03:06 -08:00

302 lines
7.1 KiB
C

/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* SGI UV IRQ functions
*
* Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
*/
#include <linux/module.h>
#include <linux/rbtree.h>
#include <linux/irq.h>
#include <asm/apic.h>
#include <asm/uv/uv_irq.h>
#include <asm/uv/uv_hub.h>
/* MMR offset and pnode of hub sourcing interrupts for a given irq */
struct uv_irq_2_mmr_pnode{
struct rb_node list;
unsigned long offset;
int pnode;
int irq;
};
static spinlock_t uv_irq_lock;
static struct rb_root uv_irq_root;
static int uv_set_irq_affinity(unsigned int, const struct cpumask *);
static void uv_noop(unsigned int irq)
{
}
static unsigned int uv_noop_ret(unsigned int irq)
{
return 0;
}
static void uv_ack_apic(unsigned int irq)
{
ack_APIC_irq();
}
struct irq_chip uv_irq_chip = {
.name = "UV-CORE",
.startup = uv_noop_ret,
.shutdown = uv_noop,
.enable = uv_noop,
.disable = uv_noop,
.ack = uv_noop,
.mask = uv_noop,
.unmask = uv_noop,
.eoi = uv_ack_apic,
.end = uv_noop,
.set_affinity = uv_set_irq_affinity,
};
/*
* Add offset and pnode information of the hub sourcing interrupts to the
* rb tree for a specific irq.
*/
static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade)
{
struct rb_node **link = &uv_irq_root.rb_node;
struct rb_node *parent = NULL;
struct uv_irq_2_mmr_pnode *n;
struct uv_irq_2_mmr_pnode *e;
unsigned long irqflags;
n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL,
uv_blade_to_memory_nid(blade));
if (!n)
return -ENOMEM;
n->irq = irq;
n->offset = offset;
n->pnode = uv_blade_to_pnode(blade);
spin_lock_irqsave(&uv_irq_lock, irqflags);
/* Find the right place in the rbtree: */
while (*link) {
parent = *link;
e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list);
if (unlikely(irq == e->irq)) {
/* irq entry exists */
e->pnode = uv_blade_to_pnode(blade);
e->offset = offset;
spin_unlock_irqrestore(&uv_irq_lock, irqflags);
kfree(n);
return 0;
}
if (irq < e->irq)
link = &(*link)->rb_left;
else
link = &(*link)->rb_right;
}
/* Insert the node into the rbtree. */
rb_link_node(&n->list, parent, link);
rb_insert_color(&n->list, &uv_irq_root);
spin_unlock_irqrestore(&uv_irq_lock, irqflags);
return 0;
}
/* Retrieve offset and pnode information from the rb tree for a specific irq */
int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode)
{
struct uv_irq_2_mmr_pnode *e;
struct rb_node *n;
unsigned long irqflags;
spin_lock_irqsave(&uv_irq_lock, irqflags);
n = uv_irq_root.rb_node;
while (n) {
e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
if (e->irq == irq) {
*offset = e->offset;
*pnode = e->pnode;
spin_unlock_irqrestore(&uv_irq_lock, irqflags);
return 0;
}
if (irq < e->irq)
n = n->rb_left;
else
n = n->rb_right;
}
spin_unlock_irqrestore(&uv_irq_lock, irqflags);
return -1;
}
/*
* Re-target the irq to the specified CPU and enable the specified MMR located
* on the specified blade to allow the sending of MSIs to the specified CPU.
*/
static int
arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
unsigned long mmr_offset, int restrict)
{
const struct cpumask *eligible_cpu = cpumask_of(cpu);
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
int mmr_pnode;
unsigned long mmr_value;
struct uv_IO_APIC_route_entry *entry;
int err;
BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
sizeof(unsigned long));
cfg = irq_cfg(irq);
err = assign_irq_vector(irq, cfg, eligible_cpu);
if (err != 0)
return err;
if (restrict == UV_AFFINITY_CPU)
desc->status |= IRQ_NO_BALANCING;
else
desc->status |= IRQ_MOVE_PCNTXT;
set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
irq_name);
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
entry->vector = cfg->vector;
entry->delivery_mode = apic->irq_delivery_mode;
entry->dest_mode = apic->irq_dest_mode;
entry->polarity = 0;
entry->trigger = 0;
entry->mask = 0;
entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
mmr_pnode = uv_blade_to_pnode(mmr_blade);
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
if (cfg->move_in_progress)
send_cleanup_vector(cfg);
return irq;
}
/*
* Disable the specified MMR located on the specified blade so that MSIs are
* longer allowed to be sent.
*/
static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
{
unsigned long mmr_value;
struct uv_IO_APIC_route_entry *entry;
BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
sizeof(unsigned long));
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
entry->mask = 1;
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
}
static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg = desc->chip_data;
unsigned int dest;
unsigned long mmr_value;
struct uv_IO_APIC_route_entry *entry;
unsigned long mmr_offset;
unsigned mmr_pnode;
if (set_desc_affinity(desc, mask, &dest))
return -1;
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
entry->vector = cfg->vector;
entry->delivery_mode = apic->irq_delivery_mode;
entry->dest_mode = apic->irq_dest_mode;
entry->polarity = 0;
entry->trigger = 0;
entry->mask = 0;
entry->dest = dest;
/* Get previously stored MMR and pnode of hub sourcing interrupts */
if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode))
return -1;
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
if (cfg->move_in_progress)
send_cleanup_vector(cfg);
return 0;
}
/*
* Set up a mapping of an available irq and vector, and enable the specified
* MMR that defines the MSI that is to be sent to the specified CPU when an
* interrupt is raised.
*/
int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
unsigned long mmr_offset, int restrict)
{
int irq, ret;
irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade));
if (irq <= 0)
return -EBUSY;
ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset,
restrict);
if (ret == irq)
uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade);
else
destroy_irq(irq);
return ret;
}
EXPORT_SYMBOL_GPL(uv_setup_irq);
/*
* Tear down a mapping of an irq and vector, and disable the specified MMR that
* defined the MSI that was to be sent to the specified CPU when an interrupt
* was raised.
*
* Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq().
*/
void uv_teardown_irq(unsigned int irq)
{
struct uv_irq_2_mmr_pnode *e;
struct rb_node *n;
unsigned long irqflags;
spin_lock_irqsave(&uv_irq_lock, irqflags);
n = uv_irq_root.rb_node;
while (n) {
e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
if (e->irq == irq) {
arch_disable_uv_irq(e->pnode, e->offset);
rb_erase(n, &uv_irq_root);
kfree(e);
break;
}
if (irq < e->irq)
n = n->rb_left;
else
n = n->rb_right;
}
spin_unlock_irqrestore(&uv_irq_lock, irqflags);
destroy_irq(irq);
}
EXPORT_SYMBOL_GPL(uv_teardown_irq);