linux_dsm_epyc7002/arch/powerpc/platforms/powernv/opal-irqchip.c
Nicholas Piggin c0beffc4f4 powerpc/powernv: Fix opal_event_shutdown() called with interrupts disabled
A kernel crash in process context that calls emergency_restart from
panic will end up calling opal_event_shutdown with interrupts disabled
but not in interrupt. This causes a sleeping function to be called
which gives the following warning with sysrq+c:

    Rebooting in 10 seconds..
    BUG: sleeping function called from invalid context at kernel/locking/mutex.c:238
    in_atomic(): 0, irqs_disabled(): 1, pid: 7669, name: bash
    CPU: 20 PID: 7669 Comm: bash Tainted: G      D W         4.17.0-rc5+ #3
    Call Trace:
    dump_stack+0xb0/0xf4 (unreliable)
    ___might_sleep+0x174/0x1a0
    mutex_lock+0x38/0xb0
    __free_irq+0x68/0x460
    free_irq+0x70/0xc0
    opal_event_shutdown+0xb4/0xf0
    opal_shutdown+0x24/0xa0
    pnv_shutdown+0x28/0x40
    machine_shutdown+0x44/0x60
    machine_restart+0x28/0x80
    emergency_restart+0x30/0x50
    panic+0x2a0/0x328
    oops_end+0x1ec/0x1f0
    bad_page_fault+0xe8/0x154
    handle_page_fault+0x34/0x38
    --- interrupt: 300 at sysrq_handle_crash+0x44/0x60
    LR = __handle_sysrq+0xfc/0x260
    flag_spec.62335+0x12b844/0x1e8db4 (unreliable)
    __handle_sysrq+0xfc/0x260
    write_sysrq_trigger+0xa8/0xb0
    proc_reg_write+0xac/0x110
    __vfs_write+0x6c/0x240
    vfs_write+0xd0/0x240
    ksys_write+0x6c/0x110

Fixes: 9f0fd0499d ("powerpc/powernv: Add a virtual irqchip for opal events")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-05-18 00:21:05 +10:00

303 lines
7.8 KiB
C

/*
* This file implements an irqchip for OPAL events. Whenever there is
* an interrupt that is handled by OPAL we get passed a list of events
* that Linux needs to do something about. These basically look like
* interrupts to Linux so we implement an irqchip to handle them.
*
* Copyright Alistair Popple, IBM Corporation 2014.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*/
#include <linux/bitops.h>
#include <linux/irq.h>
#include <linux/irqchip.h>
#include <linux/irqdomain.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/kthread.h>
#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/irq_work.h>
#include <asm/machdep.h>
#include <asm/opal.h>
#include "powernv.h"
/* Maximum number of events supported by OPAL firmware */
#define MAX_NUM_EVENTS 64
struct opal_event_irqchip {
struct irq_chip irqchip;
struct irq_domain *domain;
unsigned long mask;
};
static struct opal_event_irqchip opal_event_irqchip;
static unsigned int opal_irq_count;
static unsigned int *opal_irqs;
static void opal_handle_irq_work(struct irq_work *work);
static u64 last_outstanding_events;
static struct irq_work opal_event_irq_work = {
.func = opal_handle_irq_work,
};
void opal_handle_events(uint64_t events)
{
int virq, hwirq = 0;
u64 mask = opal_event_irqchip.mask;
if (!in_irq() && (events & mask)) {
last_outstanding_events = events;
irq_work_queue(&opal_event_irq_work);
return;
}
while (events & mask) {
hwirq = fls64(events) - 1;
if (BIT_ULL(hwirq) & mask) {
virq = irq_find_mapping(opal_event_irqchip.domain,
hwirq);
if (virq)
generic_handle_irq(virq);
}
events &= ~BIT_ULL(hwirq);
}
}
static void opal_event_mask(struct irq_data *d)
{
clear_bit(d->hwirq, &opal_event_irqchip.mask);
}
static void opal_event_unmask(struct irq_data *d)
{
__be64 events;
set_bit(d->hwirq, &opal_event_irqchip.mask);
opal_poll_events(&events);
last_outstanding_events = be64_to_cpu(events);
/*
* We can't just handle the events now with opal_handle_events().
* If we did we would deadlock when opal_event_unmask() is called from
* handle_level_irq() with the irq descriptor lock held, because
* calling opal_handle_events() would call generic_handle_irq() and
* then handle_level_irq() which would try to take the descriptor lock
* again. Instead queue the events for later.
*/
if (last_outstanding_events & opal_event_irqchip.mask)
/* Need to retrigger the interrupt */
irq_work_queue(&opal_event_irq_work);
}
static int opal_event_set_type(struct irq_data *d, unsigned int flow_type)
{
/*
* For now we only support level triggered events. The irq
* handler will be called continuously until the event has
* been cleared in OPAL.
*/
if (flow_type != IRQ_TYPE_LEVEL_HIGH)
return -EINVAL;
return 0;
}
static struct opal_event_irqchip opal_event_irqchip = {
.irqchip = {
.name = "OPAL EVT",
.irq_mask = opal_event_mask,
.irq_unmask = opal_event_unmask,
.irq_set_type = opal_event_set_type,
},
.mask = 0,
};
static int opal_event_map(struct irq_domain *d, unsigned int irq,
irq_hw_number_t hwirq)
{
irq_set_chip_data(irq, &opal_event_irqchip);
irq_set_chip_and_handler(irq, &opal_event_irqchip.irqchip,
handle_level_irq);
return 0;
}
static irqreturn_t opal_interrupt(int irq, void *data)
{
__be64 events;
opal_handle_interrupt(virq_to_hw(irq), &events);
opal_handle_events(be64_to_cpu(events));
return IRQ_HANDLED;
}
static void opal_handle_irq_work(struct irq_work *work)
{
opal_handle_events(last_outstanding_events);
}
static int opal_event_match(struct irq_domain *h, struct device_node *node,
enum irq_domain_bus_token bus_token)
{
return irq_domain_get_of_node(h) == node;
}
static int opal_event_xlate(struct irq_domain *h, struct device_node *np,
const u32 *intspec, unsigned int intsize,
irq_hw_number_t *out_hwirq, unsigned int *out_flags)
{
*out_hwirq = intspec[0];
*out_flags = IRQ_TYPE_LEVEL_HIGH;
return 0;
}
static const struct irq_domain_ops opal_event_domain_ops = {
.match = opal_event_match,
.map = opal_event_map,
.xlate = opal_event_xlate,
};
void opal_event_shutdown(void)
{
unsigned int i;
/* First free interrupts, which will also mask them */
for (i = 0; i < opal_irq_count; i++) {
if (!opal_irqs[i])
continue;
if (in_interrupt() || irqs_disabled())
disable_irq_nosync(opal_irqs[i]);
else
free_irq(opal_irqs[i], NULL);
opal_irqs[i] = 0;
}
}
int __init opal_event_init(void)
{
struct device_node *dn, *opal_node;
const char **names;
u32 *irqs;
int i, rc;
opal_node = of_find_node_by_path("/ibm,opal");
if (!opal_node) {
pr_warn("opal: Node not found\n");
return -ENODEV;
}
/* If dn is NULL it means the domain won't be linked to a DT
* node so therefore irq_of_parse_and_map(...) wont work. But
* that shouldn't be problem because if we're running a
* version of skiboot that doesn't have the dn then the
* devices won't have the correct properties and will have to
* fall back to the legacy method (opal_event_request(...))
* anyway. */
dn = of_find_compatible_node(NULL, NULL, "ibm,opal-event");
opal_event_irqchip.domain = irq_domain_add_linear(dn, MAX_NUM_EVENTS,
&opal_event_domain_ops, &opal_event_irqchip);
of_node_put(dn);
if (!opal_event_irqchip.domain) {
pr_warn("opal: Unable to create irq domain\n");
rc = -ENOMEM;
goto out;
}
/* Get opal-interrupts property and names if present */
rc = of_property_count_u32_elems(opal_node, "opal-interrupts");
if (rc < 0)
goto out;
opal_irq_count = rc;
pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count);
irqs = kcalloc(opal_irq_count, sizeof(*irqs), GFP_KERNEL);
names = kcalloc(opal_irq_count, sizeof(*names), GFP_KERNEL);
opal_irqs = kcalloc(opal_irq_count, sizeof(*opal_irqs), GFP_KERNEL);
if (WARN_ON(!irqs || !names || !opal_irqs))
goto out_free;
rc = of_property_read_u32_array(opal_node, "opal-interrupts",
irqs, opal_irq_count);
if (rc < 0) {
pr_err("Error %d reading opal-interrupts array\n", rc);
goto out_free;
}
/* It's not an error for the names to be missing */
of_property_read_string_array(opal_node, "opal-interrupts-names",
names, opal_irq_count);
/* Install interrupt handlers */
for (i = 0; i < opal_irq_count; i++) {
unsigned int virq;
char *name;
/* Get hardware and virtual IRQ */
virq = irq_create_mapping(NULL, irqs[i]);
if (!virq) {
pr_warn("Failed to map irq 0x%x\n", irqs[i]);
continue;
}
if (names[i] && strlen(names[i]))
name = kasprintf(GFP_KERNEL, "opal-%s", names[i]);
else
name = kasprintf(GFP_KERNEL, "opal");
/* Install interrupt handler */
rc = request_irq(virq, opal_interrupt, IRQF_TRIGGER_LOW,
name, NULL);
if (rc) {
irq_dispose_mapping(virq);
pr_warn("Error %d requesting irq %d (0x%x)\n",
rc, virq, irqs[i]);
continue;
}
/* Cache IRQ */
opal_irqs[i] = virq;
}
out_free:
kfree(irqs);
kfree(names);
out:
of_node_put(opal_node);
return rc;
}
machine_arch_initcall(powernv, opal_event_init);
/**
* opal_event_request(unsigned int opal_event_nr) - Request an event
* @opal_event_nr: the opal event number to request
*
* This routine can be used to find the linux virq number which can
* then be passed to request_irq to assign a handler for a particular
* opal event. This should only be used by legacy devices which don't
* have proper device tree bindings. Most devices should use
* irq_of_parse_and_map() instead.
*/
int opal_event_request(unsigned int opal_event_nr)
{
if (WARN_ON_ONCE(!opal_event_irqchip.domain))
return 0;
return irq_create_mapping(opal_event_irqchip.domain, opal_event_nr);
}
EXPORT_SYMBOL(opal_event_request);