linux_dsm_epyc7002/arch/i386/kernel/irq.c
David Howells 7d12e780e0 IRQ: Maintain regs pointer globally rather than passing to IRQ handlers
Maintain a per-CPU global "struct pt_regs *" variable which can be used instead
of passing regs around manually through all ~1800 interrupt handlers in the
Linux kernel.

The regs pointer is used in few places, but it potentially costs both stack
space and code to pass it around.  On the FRV arch, removing the regs parameter
from all the genirq function results in a 20% speed up of the IRQ exit path
(ie: from leaving timer_interrupt() to leaving do_IRQ()).

Where appropriate, an arch may override the generic storage facility and do
something different with the variable.  On FRV, for instance, the address is
maintained in GR28 at all times inside the kernel as part of general exception
handling.

Having looked over the code, it appears that the parameter may be handed down
through up to twenty or so layers of functions.  Consider a USB character
device attached to a USB hub, attached to a USB controller that posts its
interrupts through a cascaded auxiliary interrupt controller.  A character
device driver may want to pass regs to the sysrq handler through the input
layer which adds another few layers of parameter passing.

I've build this code with allyesconfig for x86_64 and i386.  I've runtested the
main part of the code on FRV and i386, though I can't test most of the drivers.
I've also done partial conversion for powerpc and MIPS - these at least compile
with minimal configurations.

This will affect all archs.  Mostly the changes should be relatively easy.
Take do_IRQ(), store the regs pointer at the beginning, saving the old one:

	struct pt_regs *old_regs = set_irq_regs(regs);

And put the old one back at the end:

	set_irq_regs(old_regs);

Don't pass regs through to generic_handle_irq() or __do_IRQ().

In timer_interrupt(), this sort of change will be necessary:

	-	update_process_times(user_mode(regs));
	-	profile_tick(CPU_PROFILING, regs);
	+	update_process_times(user_mode(get_irq_regs()));
	+	profile_tick(CPU_PROFILING);

I'd like to move update_process_times()'s use of get_irq_regs() into itself,
except that i386, alone of the archs, uses something other than user_mode().

Some notes on the interrupt handling in the drivers:

 (*) input_dev() is now gone entirely.  The regs pointer is no longer stored in
     the input_dev struct.

 (*) finish_unlinks() in drivers/usb/host/ohci-q.c needs checking.  It does
     something different depending on whether it's been supplied with a regs
     pointer or not.

 (*) Various IRQ handler function pointers have been moved to type
     irq_handler_t.

Signed-Off-By: David Howells <dhowells@redhat.com>
(cherry picked from 1b16e7ac850969f38b375e511e3fa2f474a33867 commit)
2006-10-05 15:10:12 +01:00

331 lines
8.0 KiB
C

/*
* linux/arch/i386/kernel/irq.c
*
* Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
*
* This file contains the lowest level x86-specific interrupt
* entry, irq-stacks and irq statistics code. All the remaining
* irq logic is done by the generic kernel/irq/ code and
* by the x86-specific irq controller code. (e.g. i8259.c and
* io_apic.c.)
*/
#include <asm/uaccess.h>
#include <linux/module.h>
#include <linux/seq_file.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/delay.h>
DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
EXPORT_PER_CPU_SYMBOL(irq_stat);
#ifndef CONFIG_X86_LOCAL_APIC
/*
* 'what should we do if we get a hw irq event on an illegal vector'.
* each architecture has to answer this themselves.
*/
void ack_bad_irq(unsigned int irq)
{
printk("unexpected IRQ trap at vector %02x\n", irq);
}
#endif
#ifdef CONFIG_4KSTACKS
/*
* per-CPU IRQ handling contexts (thread information and stack)
*/
union irq_ctx {
struct thread_info tinfo;
u32 stack[THREAD_SIZE/sizeof(u32)];
};
static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
#endif
/*
* do_IRQ handles all normal device IRQ's (the special
* SMP cross-CPU interrupts have their own specific
* handlers).
*/
fastcall unsigned int do_IRQ(struct pt_regs *regs)
{
struct pt_regs *old_regs;
/* high bit used in ret_from_ code */
int irq = ~regs->orig_eax;
struct irq_desc *desc = irq_desc + irq;
#ifdef CONFIG_4KSTACKS
union irq_ctx *curctx, *irqctx;
u32 *isp;
#endif
if (unlikely((unsigned)irq >= NR_IRQS)) {
printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
__FUNCTION__, irq);
BUG();
}
old_regs = set_irq_regs(regs);
irq_enter();
#ifdef CONFIG_DEBUG_STACKOVERFLOW
/* Debugging check for stack overflow: is there less than 1KB free? */
{
long esp;
__asm__ __volatile__("andl %%esp,%0" :
"=r" (esp) : "0" (THREAD_SIZE - 1));
if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
printk("do_IRQ: stack overflow: %ld\n",
esp - sizeof(struct thread_info));
dump_stack();
}
}
#endif
#ifdef CONFIG_4KSTACKS
curctx = (union irq_ctx *) current_thread_info();
irqctx = hardirq_ctx[smp_processor_id()];
/*
* this is where we switch to the IRQ stack. However, if we are
* already using the IRQ stack (because we interrupted a hardirq
* handler) we can't do that and just have to keep using the
* current stack (which is the irq stack already after all)
*/
if (curctx != irqctx) {
int arg1, arg2, ebx;
/* build the stack frame on the IRQ stack */
isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
irqctx->tinfo.task = curctx->tinfo.task;
irqctx->tinfo.previous_esp = current_stack_pointer;
/*
* Copy the softirq bits in preempt_count so that the
* softirq checks work in the hardirq context.
*/
irqctx->tinfo.preempt_count =
(irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) |
(curctx->tinfo.preempt_count & SOFTIRQ_MASK);
asm volatile(
" xchgl %%ebx,%%esp \n"
" call *%%edi \n"
" movl %%ebx,%%esp \n"
: "=a" (arg1), "=d" (arg2), "=b" (ebx)
: "0" (irq), "1" (desc), "2" (isp),
"D" (desc->handle_irq)
: "memory", "cc"
);
} else
#endif
desc->handle_irq(irq, desc);
irq_exit();
set_irq_regs(old_regs);
return 1;
}
#ifdef CONFIG_4KSTACKS
/*
* These should really be __section__(".bss.page_aligned") as well, but
* gcc's 3.0 and earlier don't handle that correctly.
*/
static char softirq_stack[NR_CPUS * THREAD_SIZE]
__attribute__((__aligned__(THREAD_SIZE)));
static char hardirq_stack[NR_CPUS * THREAD_SIZE]
__attribute__((__aligned__(THREAD_SIZE)));
/*
* allocate per-cpu stacks for hardirq and for softirq processing
*/
void irq_ctx_init(int cpu)
{
union irq_ctx *irqctx;
if (hardirq_ctx[cpu])
return;
irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
irqctx->tinfo.task = NULL;
irqctx->tinfo.exec_domain = NULL;
irqctx->tinfo.cpu = cpu;
irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
hardirq_ctx[cpu] = irqctx;
irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE];
irqctx->tinfo.task = NULL;
irqctx->tinfo.exec_domain = NULL;
irqctx->tinfo.cpu = cpu;
irqctx->tinfo.preempt_count = 0;
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
softirq_ctx[cpu] = irqctx;
printk("CPU %u irqstacks, hard=%p soft=%p\n",
cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
}
void irq_ctx_exit(int cpu)
{
hardirq_ctx[cpu] = NULL;
}
extern asmlinkage void __do_softirq(void);
asmlinkage void do_softirq(void)
{
unsigned long flags;
struct thread_info *curctx;
union irq_ctx *irqctx;
u32 *isp;
if (in_interrupt())
return;
local_irq_save(flags);
if (local_softirq_pending()) {
curctx = current_thread_info();
irqctx = softirq_ctx[smp_processor_id()];
irqctx->tinfo.task = curctx->task;
irqctx->tinfo.previous_esp = current_stack_pointer;
/* build the stack frame on the softirq stack */
isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
asm volatile(
" xchgl %%ebx,%%esp \n"
" call __do_softirq \n"
" movl %%ebx,%%esp \n"
: "=b"(isp)
: "0"(isp)
: "memory", "cc", "edx", "ecx", "eax"
);
/*
* Shouldnt happen, we returned above if in_interrupt():
*/
WARN_ON_ONCE(softirq_count());
}
local_irq_restore(flags);
}
EXPORT_SYMBOL(do_softirq);
#endif
/*
* Interrupt statistics:
*/
atomic_t irq_err_count;
/*
* /proc/interrupts printing:
*/
int show_interrupts(struct seq_file *p, void *v)
{
int i = *(loff_t *) v, j;
struct irqaction * action;
unsigned long flags;
if (i == 0) {
seq_printf(p, " ");
for_each_online_cpu(j)
seq_printf(p, "CPU%-8d",j);
seq_putc(p, '\n');
}
if (i < NR_IRQS) {
spin_lock_irqsave(&irq_desc[i].lock, flags);
action = irq_desc[i].action;
if (!action)
goto skip;
seq_printf(p, "%3d: ",i);
#ifndef CONFIG_SMP
seq_printf(p, "%10u ", kstat_irqs(i));
#else
for_each_online_cpu(j)
seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
#endif
seq_printf(p, " %8s", irq_desc[i].chip->name);
seq_printf(p, "-%s", handle_irq_name(irq_desc[i].handle_irq));
seq_printf(p, " %s", action->name);
for (action=action->next; action; action = action->next)
seq_printf(p, ", %s", action->name);
seq_putc(p, '\n');
skip:
spin_unlock_irqrestore(&irq_desc[i].lock, flags);
} else if (i == NR_IRQS) {
seq_printf(p, "NMI: ");
for_each_online_cpu(j)
seq_printf(p, "%10u ", nmi_count(j));
seq_putc(p, '\n');
#ifdef CONFIG_X86_LOCAL_APIC
seq_printf(p, "LOC: ");
for_each_online_cpu(j)
seq_printf(p, "%10u ",
per_cpu(irq_stat,j).apic_timer_irqs);
seq_putc(p, '\n');
#endif
seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
#if defined(CONFIG_X86_IO_APIC)
seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
#endif
}
return 0;
}
#ifdef CONFIG_HOTPLUG_CPU
#include <mach_apic.h>
void fixup_irqs(cpumask_t map)
{
unsigned int irq;
static int warned;
for (irq = 0; irq < NR_IRQS; irq++) {
cpumask_t mask;
if (irq == 2)
continue;
cpus_and(mask, irq_desc[irq].affinity, map);
if (any_online_cpu(mask) == NR_CPUS) {
printk("Breaking affinity for irq %i\n", irq);
mask = map;
}
if (irq_desc[irq].chip->set_affinity)
irq_desc[irq].chip->set_affinity(irq, mask);
else if (irq_desc[irq].action && !(warned++))
printk("Cannot set affinity for irq %i\n", irq);
}
#if 0
barrier();
/* Ingo Molnar says: "after the IO-APIC masks have been redirected
[note the nop - the interrupt-enable boundary on x86 is two
instructions from sti] - to flush out pending hardirqs and
IPIs. After this point nothing is supposed to reach this CPU." */
__asm__ __volatile__("sti; nop; cli");
barrier();
#else
/* That doesn't seem sufficient. Give it 1ms. */
local_irq_enable();
mdelay(1);
local_irq_disable();
#endif
}
#endif