linux_dsm_epyc7002/drivers/irqchip/irq-partition-percpu.c

246 lines
6.3 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
irqchip: Add per-cpu interrupt partitioning library We've unfortunately started seeing a situation where percpu interrupts are partitioned in the system: one arbitrary set of CPUs has an interrupt connected to a type of device, while another disjoint set of CPUs has the same interrupt connected to another type of device. This makes it impossible to have a device driver requesting this interrupt using the current percpu-interrupt abstraction, as the same interrupt number is now potentially claimed by at least two drivers, and we forbid interrupt sharing on per-cpu interrupt. A solution to this is to turn things upside down. Let's assume that our system describes all the possible partitions for a given interrupt, and give each of them a unique identifier. It is then possible to create a namespace where the affinity identifier itself is a form of interrupt number. At this point, it becomes easy to implement a set of partitions as a cascaded irqchip, each affinity identifier being the HW irq. This allows us to keep a number of nice properties: - Each partition results in a separate percpu-interrupt (with a restrictied affinity), which keeps drivers happy. - Because the underlying interrupt is still per-cpu, the overhead of the indirection can be kept pretty minimal. - The core code can ignore most of that crap. For that purpose, we implement a small library that deals with some of the boilerplate code, relying on platform-specific drivers to provide a description of the affinity sets and a set of callbacks. Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: devicetree@vger.kernel.org Cc: Jason Cooper <jason@lakedaemon.net> Cc: Will Deacon <will.deacon@arm.com> Cc: Rob Herring <robh+dt@kernel.org> Link: http://lkml.kernel.org/r/1460365075-7316-4-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2016-04-11 15:57:53 +07:00
/*
* Copyright (C) 2016 ARM Limited, All Rights Reserved.
* Author: Marc Zyngier <marc.zyngier@arm.com>
*/
#include <linux/bitops.h>
#include <linux/interrupt.h>
#include <linux/irqchip.h>
#include <linux/irqchip/chained_irq.h>
#include <linux/irqchip/irq-partition-percpu.h>
#include <linux/irqdomain.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
struct partition_desc {
int nr_parts;
struct partition_affinity *parts;
struct irq_domain *domain;
struct irq_desc *chained_desc;
unsigned long *bitmap;
struct irq_domain_ops ops;
};
static bool partition_check_cpu(struct partition_desc *part,
unsigned int cpu, unsigned int hwirq)
{
return cpumask_test_cpu(cpu, &part->parts[hwirq].mask);
}
static void partition_irq_mask(struct irq_data *d)
{
struct partition_desc *part = irq_data_get_irq_chip_data(d);
struct irq_chip *chip = irq_desc_get_chip(part->chained_desc);
struct irq_data *data = irq_desc_get_irq_data(part->chained_desc);
if (partition_check_cpu(part, smp_processor_id(), d->hwirq) &&
chip->irq_mask)
chip->irq_mask(data);
}
static void partition_irq_unmask(struct irq_data *d)
{
struct partition_desc *part = irq_data_get_irq_chip_data(d);
struct irq_chip *chip = irq_desc_get_chip(part->chained_desc);
struct irq_data *data = irq_desc_get_irq_data(part->chained_desc);
if (partition_check_cpu(part, smp_processor_id(), d->hwirq) &&
chip->irq_unmask)
chip->irq_unmask(data);
}
static int partition_irq_set_irqchip_state(struct irq_data *d,
enum irqchip_irq_state which,
bool val)
{
struct partition_desc *part = irq_data_get_irq_chip_data(d);
struct irq_chip *chip = irq_desc_get_chip(part->chained_desc);
struct irq_data *data = irq_desc_get_irq_data(part->chained_desc);
if (partition_check_cpu(part, smp_processor_id(), d->hwirq) &&
chip->irq_set_irqchip_state)
return chip->irq_set_irqchip_state(data, which, val);
return -EINVAL;
}
static int partition_irq_get_irqchip_state(struct irq_data *d,
enum irqchip_irq_state which,
bool *val)
{
struct partition_desc *part = irq_data_get_irq_chip_data(d);
struct irq_chip *chip = irq_desc_get_chip(part->chained_desc);
struct irq_data *data = irq_desc_get_irq_data(part->chained_desc);
if (partition_check_cpu(part, smp_processor_id(), d->hwirq) &&
chip->irq_get_irqchip_state)
return chip->irq_get_irqchip_state(data, which, val);
return -EINVAL;
}
static int partition_irq_set_type(struct irq_data *d, unsigned int type)
{
struct partition_desc *part = irq_data_get_irq_chip_data(d);
struct irq_chip *chip = irq_desc_get_chip(part->chained_desc);
struct irq_data *data = irq_desc_get_irq_data(part->chained_desc);
if (chip->irq_set_type)
return chip->irq_set_type(data, type);
return -EINVAL;
}
static void partition_irq_print_chip(struct irq_data *d, struct seq_file *p)
{
struct partition_desc *part = irq_data_get_irq_chip_data(d);
struct irq_chip *chip = irq_desc_get_chip(part->chained_desc);
struct irq_data *data = irq_desc_get_irq_data(part->chained_desc);
seq_printf(p, " %5s-%lu", chip->name, data->hwirq);
}
static struct irq_chip partition_irq_chip = {
.irq_mask = partition_irq_mask,
.irq_unmask = partition_irq_unmask,
.irq_set_type = partition_irq_set_type,
.irq_get_irqchip_state = partition_irq_get_irqchip_state,
.irq_set_irqchip_state = partition_irq_set_irqchip_state,
.irq_print_chip = partition_irq_print_chip,
};
static void partition_handle_irq(struct irq_desc *desc)
{
struct partition_desc *part = irq_desc_get_handler_data(desc);
struct irq_chip *chip = irq_desc_get_chip(desc);
int cpu = smp_processor_id();
int hwirq;
chained_irq_enter(chip, desc);
for_each_set_bit(hwirq, part->bitmap, part->nr_parts) {
if (partition_check_cpu(part, cpu, hwirq))
break;
}
if (unlikely(hwirq == part->nr_parts)) {
handle_bad_irq(desc);
} else {
unsigned int irq;
irq = irq_find_mapping(part->domain, hwirq);
generic_handle_irq(irq);
}
chained_irq_exit(chip, desc);
}
static int partition_domain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg)
{
int ret;
irq_hw_number_t hwirq;
unsigned int type;
struct irq_fwspec *fwspec = arg;
struct partition_desc *part;
BUG_ON(nr_irqs != 1);
ret = domain->ops->translate(domain, fwspec, &hwirq, &type);
if (ret)
return ret;
part = domain->host_data;
set_bit(hwirq, part->bitmap);
irq_set_chained_handler_and_data(irq_desc_get_irq(part->chained_desc),
partition_handle_irq, part);
irq_set_percpu_devid_partition(virq, &part->parts[hwirq].mask);
irq_domain_set_info(domain, virq, hwirq, &partition_irq_chip, part,
handle_percpu_devid_irq, NULL, NULL);
irq_set_status_flags(virq, IRQ_NOAUTOEN);
return 0;
}
static void partition_domain_free(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs)
{
struct irq_data *d;
BUG_ON(nr_irqs != 1);
d = irq_domain_get_irq_data(domain, virq);
irq_set_handler(virq, NULL);
irq_domain_reset_irq_data(d);
}
int partition_translate_id(struct partition_desc *desc, void *partition_id)
{
struct partition_affinity *part = NULL;
int i;
for (i = 0; i < desc->nr_parts; i++) {
if (desc->parts[i].partition_id == partition_id) {
part = &desc->parts[i];
break;
}
}
if (WARN_ON(!part)) {
pr_err("Failed to find partition\n");
return -EINVAL;
}
return i;
}
struct partition_desc *partition_create_desc(struct fwnode_handle *fwnode,
struct partition_affinity *parts,
int nr_parts,
int chained_irq,
const struct irq_domain_ops *ops)
{
struct partition_desc *desc;
struct irq_domain *d;
BUG_ON(!ops->select || !ops->translate);
desc = kzalloc(sizeof(*desc), GFP_KERNEL);
if (!desc)
return NULL;
desc->ops = *ops;
desc->ops.free = partition_domain_free;
desc->ops.alloc = partition_domain_alloc;
d = irq_domain_create_linear(fwnode, nr_parts, &desc->ops, desc);
if (!d)
goto out;
desc->domain = d;
treewide: kzalloc() -> kcalloc() The kzalloc() function has a 2-factor argument form, kcalloc(). This patch replaces cases of: kzalloc(a * b, gfp) with: kcalloc(a * b, gfp) as well as handling cases of: kzalloc(a * b * c, gfp) with: kzalloc(array3_size(a, b, c), gfp) as it's slightly less ugly than: kzalloc_array(array_size(a, b), c, gfp) This does, however, attempt to ignore constant size factors like: kzalloc(4 * 1024, gfp) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ type TYPE; expression THING, E; @@ ( kzalloc( - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | kzalloc( - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression COUNT; typedef u8; typedef __u8; @@ ( kzalloc( - sizeof(u8) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(__u8) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(char) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(unsigned char) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(u8) * COUNT + COUNT , ...) | kzalloc( - sizeof(__u8) * COUNT + COUNT , ...) | kzalloc( - sizeof(char) * COUNT + COUNT , ...) | kzalloc( - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( - kzalloc + kcalloc ( - sizeof(TYPE) * (COUNT_ID) + COUNT_ID, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * COUNT_ID + COUNT_ID, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * (COUNT_CONST) + COUNT_CONST, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * COUNT_CONST + COUNT_CONST, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (COUNT_ID) + COUNT_ID, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * COUNT_ID + COUNT_ID, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (COUNT_CONST) + COUNT_CONST, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * COUNT_CONST + COUNT_CONST, sizeof(THING) , ...) ) // 2-factor product, only identifiers. @@ identifier SIZE, COUNT; @@ - kzalloc + kcalloc ( - SIZE * COUNT + COUNT, SIZE , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( kzalloc( - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( kzalloc( - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kzalloc( - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kzalloc( - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ identifier STRIDE, SIZE, COUNT; @@ ( kzalloc( - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products, // when they're not all constants... @@ expression E1, E2, E3; constant C1, C2, C3; @@ ( kzalloc(C1 * C2 * C3, ...) | kzalloc( - (E1) * E2 * E3 + array3_size(E1, E2, E3) , ...) | kzalloc( - (E1) * (E2) * E3 + array3_size(E1, E2, E3) , ...) | kzalloc( - (E1) * (E2) * (E3) + array3_size(E1, E2, E3) , ...) | kzalloc( - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants, // keeping sizeof() as the second factor argument. @@ expression THING, E1, E2; type TYPE; constant C1, C2, C3; @@ ( kzalloc(sizeof(THING) * C2, ...) | kzalloc(sizeof(TYPE) * C2, ...) | kzalloc(C1 * C2 * C3, ...) | kzalloc(C1 * C2, ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * (E2) + E2, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * E2 + E2, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (E2) + E2, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * E2 + E2, sizeof(THING) , ...) | - kzalloc + kcalloc ( - (E1) * E2 + E1, E2 , ...) | - kzalloc + kcalloc ( - (E1) * (E2) + E1, E2 , ...) | - kzalloc + kcalloc ( - E1 * E2 + E1, E2 , ...) ) Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-13 04:03:40 +07:00
desc->bitmap = kcalloc(BITS_TO_LONGS(nr_parts), sizeof(long),
irqchip: Add per-cpu interrupt partitioning library We've unfortunately started seeing a situation where percpu interrupts are partitioned in the system: one arbitrary set of CPUs has an interrupt connected to a type of device, while another disjoint set of CPUs has the same interrupt connected to another type of device. This makes it impossible to have a device driver requesting this interrupt using the current percpu-interrupt abstraction, as the same interrupt number is now potentially claimed by at least two drivers, and we forbid interrupt sharing on per-cpu interrupt. A solution to this is to turn things upside down. Let's assume that our system describes all the possible partitions for a given interrupt, and give each of them a unique identifier. It is then possible to create a namespace where the affinity identifier itself is a form of interrupt number. At this point, it becomes easy to implement a set of partitions as a cascaded irqchip, each affinity identifier being the HW irq. This allows us to keep a number of nice properties: - Each partition results in a separate percpu-interrupt (with a restrictied affinity), which keeps drivers happy. - Because the underlying interrupt is still per-cpu, the overhead of the indirection can be kept pretty minimal. - The core code can ignore most of that crap. For that purpose, we implement a small library that deals with some of the boilerplate code, relying on platform-specific drivers to provide a description of the affinity sets and a set of callbacks. Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: devicetree@vger.kernel.org Cc: Jason Cooper <jason@lakedaemon.net> Cc: Will Deacon <will.deacon@arm.com> Cc: Rob Herring <robh+dt@kernel.org> Link: http://lkml.kernel.org/r/1460365075-7316-4-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2016-04-11 15:57:53 +07:00
GFP_KERNEL);
if (WARN_ON(!desc->bitmap))
goto out;
desc->chained_desc = irq_to_desc(chained_irq);
desc->nr_parts = nr_parts;
desc->parts = parts;
return desc;
out:
if (d)
irq_domain_remove(d);
kfree(desc);
return NULL;
}
struct irq_domain *partition_get_domain(struct partition_desc *dsc)
{
if (dsc)
return dsc->domain;
return NULL;
}