2008-10-23 12:26:29 +07:00
|
|
|
#ifndef _ASM_X86_HW_IRQ_H
|
|
|
|
#define _ASM_X86_HW_IRQ_H
|
2008-05-03 00:00:30 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
|
|
|
|
*
|
|
|
|
* moved some of the old arch/i386/kernel/irq.h to here. VY
|
|
|
|
*
|
|
|
|
* IRQ/IPI changes taken from work by Thomas Radke
|
|
|
|
* <tomsoft@informatik.tu-chemnitz.de>
|
|
|
|
*
|
|
|
|
* hacked by Andi Kleen for x86-64.
|
|
|
|
* unified by tglx
|
|
|
|
*/
|
|
|
|
|
2008-05-03 01:10:09 +07:00
|
|
|
#include <asm/irq_vectors.h>
|
2008-05-03 00:00:30 +07:00
|
|
|
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
|
|
|
|
#include <linux/percpu.h>
|
|
|
|
#include <linux/profile.h>
|
|
|
|
#include <linux/smp.h>
|
|
|
|
|
2011-07-27 06:09:06 +07:00
|
|
|
#include <linux/atomic.h>
|
2008-05-03 00:00:30 +07:00
|
|
|
#include <asm/irq.h>
|
|
|
|
#include <asm/sections.h>
|
|
|
|
|
|
|
|
/* Interrupt handlers registered during init_IRQ */
|
2013-08-06 05:02:37 +07:00
|
|
|
extern asmlinkage void apic_timer_interrupt(void);
|
|
|
|
extern asmlinkage void x86_platform_ipi(void);
|
|
|
|
extern asmlinkage void kvm_posted_intr_ipi(void);
|
2015-05-19 16:07:16 +07:00
|
|
|
extern asmlinkage void kvm_posted_intr_wakeup_ipi(void);
|
2013-08-06 05:02:37 +07:00
|
|
|
extern asmlinkage void error_interrupt(void);
|
|
|
|
extern asmlinkage void irq_work_interrupt(void);
|
|
|
|
|
|
|
|
extern asmlinkage void spurious_interrupt(void);
|
|
|
|
extern asmlinkage void thermal_interrupt(void);
|
|
|
|
extern asmlinkage void reschedule_interrupt(void);
|
|
|
|
|
|
|
|
extern asmlinkage void irq_move_cleanup_interrupt(void);
|
|
|
|
extern asmlinkage void reboot_interrupt(void);
|
|
|
|
extern asmlinkage void threshold_interrupt(void);
|
2015-05-06 18:58:56 +07:00
|
|
|
extern asmlinkage void deferred_error_interrupt(void);
|
2013-08-06 05:02:37 +07:00
|
|
|
|
|
|
|
extern asmlinkage void call_function_interrupt(void);
|
|
|
|
extern asmlinkage void call_function_single_interrupt(void);
|
2008-05-03 00:00:30 +07:00
|
|
|
|
x86, trace: Add irq vector tracepoints
[Purpose of this patch]
As Vaibhav explained in the thread below, tracepoints for irq vectors
are useful.
http://www.spinics.net/lists/mm-commits/msg85707.html
<snip>
The current interrupt traces from irq_handler_entry and irq_handler_exit
provide when an interrupt is handled. They provide good data about when
the system has switched to kernel space and how it affects the currently
running processes.
There are some IRQ vectors which trigger the system into kernel space,
which are not handled in generic IRQ handlers. Tracing such events gives
us the information about IRQ interaction with other system events.
The trace also tells where the system is spending its time. We want to
know which cores are handling interrupts and how they are affecting other
processes in the system. Also, the trace provides information about when
the cores are idle and which interrupts are changing that state.
<snip>
On the other hand, my usecase is tracing just local timer event and
getting a value of instruction pointer.
I suggested to add an argument local timer event to get instruction pointer before.
But there is another way to get it with external module like systemtap.
So, I don't need to add any argument to irq vector tracepoints now.
[Patch Description]
Vaibhav's patch shared a trace point ,irq_vector_entry/irq_vector_exit, in all events.
But there is an above use case to trace specific irq_vector rather than tracing all events.
In this case, we are concerned about overhead due to unwanted events.
So, add following tracepoints instead of introducing irq_vector_entry/exit.
so that we can enable them independently.
- local_timer_vector
- reschedule_vector
- call_function_vector
- call_function_single_vector
- irq_work_entry_vector
- error_apic_vector
- thermal_apic_vector
- threshold_apic_vector
- spurious_apic_vector
- x86_platform_ipi_vector
Also, introduce a logic switching IDT at enabling/disabling time so that a time penalty
makes a zero when tracepoints are disabled. Detailed explanations are as follows.
- Create trace irq handlers with entering_irq()/exiting_irq().
- Create a new IDT, trace_idt_table, at boot time by adding a logic to
_set_gate(). It is just a copy of original idt table.
- Register the new handlers for tracpoints to the new IDT by introducing
macros to alloc_intr_gate() called at registering time of irq_vector handlers.
- Add checking, whether irq vector tracing is on/off, into load_current_idt().
This has to be done below debug checking for these reasons.
- Switching to debug IDT may be kicked while tracing is enabled.
- On the other hands, switching to trace IDT is kicked only when debugging
is disabled.
In addition, the new IDT is created only when CONFIG_TRACING is enabled to avoid being
used for other purposes.
Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Link: http://lkml.kernel.org/r/51C323ED.5050708@hds.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
2013-06-20 22:46:53 +07:00
|
|
|
#ifdef CONFIG_TRACING
|
|
|
|
/* Interrupt handlers registered during init_IRQ */
|
|
|
|
extern void trace_apic_timer_interrupt(void);
|
|
|
|
extern void trace_x86_platform_ipi(void);
|
|
|
|
extern void trace_error_interrupt(void);
|
|
|
|
extern void trace_irq_work_interrupt(void);
|
|
|
|
extern void trace_spurious_interrupt(void);
|
|
|
|
extern void trace_thermal_interrupt(void);
|
|
|
|
extern void trace_reschedule_interrupt(void);
|
|
|
|
extern void trace_threshold_interrupt(void);
|
2015-05-06 18:58:56 +07:00
|
|
|
extern void trace_deferred_error_interrupt(void);
|
x86, trace: Add irq vector tracepoints
[Purpose of this patch]
As Vaibhav explained in the thread below, tracepoints for irq vectors
are useful.
http://www.spinics.net/lists/mm-commits/msg85707.html
<snip>
The current interrupt traces from irq_handler_entry and irq_handler_exit
provide when an interrupt is handled. They provide good data about when
the system has switched to kernel space and how it affects the currently
running processes.
There are some IRQ vectors which trigger the system into kernel space,
which are not handled in generic IRQ handlers. Tracing such events gives
us the information about IRQ interaction with other system events.
The trace also tells where the system is spending its time. We want to
know which cores are handling interrupts and how they are affecting other
processes in the system. Also, the trace provides information about when
the cores are idle and which interrupts are changing that state.
<snip>
On the other hand, my usecase is tracing just local timer event and
getting a value of instruction pointer.
I suggested to add an argument local timer event to get instruction pointer before.
But there is another way to get it with external module like systemtap.
So, I don't need to add any argument to irq vector tracepoints now.
[Patch Description]
Vaibhav's patch shared a trace point ,irq_vector_entry/irq_vector_exit, in all events.
But there is an above use case to trace specific irq_vector rather than tracing all events.
In this case, we are concerned about overhead due to unwanted events.
So, add following tracepoints instead of introducing irq_vector_entry/exit.
so that we can enable them independently.
- local_timer_vector
- reschedule_vector
- call_function_vector
- call_function_single_vector
- irq_work_entry_vector
- error_apic_vector
- thermal_apic_vector
- threshold_apic_vector
- spurious_apic_vector
- x86_platform_ipi_vector
Also, introduce a logic switching IDT at enabling/disabling time so that a time penalty
makes a zero when tracepoints are disabled. Detailed explanations are as follows.
- Create trace irq handlers with entering_irq()/exiting_irq().
- Create a new IDT, trace_idt_table, at boot time by adding a logic to
_set_gate(). It is just a copy of original idt table.
- Register the new handlers for tracpoints to the new IDT by introducing
macros to alloc_intr_gate() called at registering time of irq_vector handlers.
- Add checking, whether irq vector tracing is on/off, into load_current_idt().
This has to be done below debug checking for these reasons.
- Switching to debug IDT may be kicked while tracing is enabled.
- On the other hands, switching to trace IDT is kicked only when debugging
is disabled.
In addition, the new IDT is created only when CONFIG_TRACING is enabled to avoid being
used for other purposes.
Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Link: http://lkml.kernel.org/r/51C323ED.5050708@hds.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
2013-06-20 22:46:53 +07:00
|
|
|
extern void trace_call_function_interrupt(void);
|
|
|
|
extern void trace_call_function_single_interrupt(void);
|
|
|
|
#define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt
|
|
|
|
#define trace_reboot_interrupt reboot_interrupt
|
|
|
|
#define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi
|
2015-05-19 16:07:16 +07:00
|
|
|
#define trace_kvm_posted_intr_wakeup_ipi kvm_posted_intr_wakeup_ipi
|
x86, trace: Add irq vector tracepoints
[Purpose of this patch]
As Vaibhav explained in the thread below, tracepoints for irq vectors
are useful.
http://www.spinics.net/lists/mm-commits/msg85707.html
<snip>
The current interrupt traces from irq_handler_entry and irq_handler_exit
provide when an interrupt is handled. They provide good data about when
the system has switched to kernel space and how it affects the currently
running processes.
There are some IRQ vectors which trigger the system into kernel space,
which are not handled in generic IRQ handlers. Tracing such events gives
us the information about IRQ interaction with other system events.
The trace also tells where the system is spending its time. We want to
know which cores are handling interrupts and how they are affecting other
processes in the system. Also, the trace provides information about when
the cores are idle and which interrupts are changing that state.
<snip>
On the other hand, my usecase is tracing just local timer event and
getting a value of instruction pointer.
I suggested to add an argument local timer event to get instruction pointer before.
But there is another way to get it with external module like systemtap.
So, I don't need to add any argument to irq vector tracepoints now.
[Patch Description]
Vaibhav's patch shared a trace point ,irq_vector_entry/irq_vector_exit, in all events.
But there is an above use case to trace specific irq_vector rather than tracing all events.
In this case, we are concerned about overhead due to unwanted events.
So, add following tracepoints instead of introducing irq_vector_entry/exit.
so that we can enable them independently.
- local_timer_vector
- reschedule_vector
- call_function_vector
- call_function_single_vector
- irq_work_entry_vector
- error_apic_vector
- thermal_apic_vector
- threshold_apic_vector
- spurious_apic_vector
- x86_platform_ipi_vector
Also, introduce a logic switching IDT at enabling/disabling time so that a time penalty
makes a zero when tracepoints are disabled. Detailed explanations are as follows.
- Create trace irq handlers with entering_irq()/exiting_irq().
- Create a new IDT, trace_idt_table, at boot time by adding a logic to
_set_gate(). It is just a copy of original idt table.
- Register the new handlers for tracpoints to the new IDT by introducing
macros to alloc_intr_gate() called at registering time of irq_vector handlers.
- Add checking, whether irq vector tracing is on/off, into load_current_idt().
This has to be done below debug checking for these reasons.
- Switching to debug IDT may be kicked while tracing is enabled.
- On the other hands, switching to trace IDT is kicked only when debugging
is disabled.
In addition, the new IDT is created only when CONFIG_TRACING is enabled to avoid being
used for other purposes.
Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Link: http://lkml.kernel.org/r/51C323ED.5050708@hds.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
2013-06-20 22:46:53 +07:00
|
|
|
#endif /* CONFIG_TRACING */
|
|
|
|
|
2014-10-27 15:11:56 +07:00
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
2014-10-27 15:12:00 +07:00
|
|
|
struct irq_data;
|
irq_remapping: Introduce new interfaces to support hierarchical irqdomains
Introduce new interfaces for interrupt remapping drivers to support
hierarchical irqdomains:
1) irq_remapping_get_ir_irq_domain(): get irqdomain associated with an
interrupt remapping unit. IOAPIC/HPET drivers use this interface to
get parent interrupt remapping irqdomain.
2) irq_remapping_get_irq_domain(): get irqdomain for an IRQ allocation.
This is mainly used to support MSI irqdomain. We must build one MSI
irqdomain for each interrupt remapping unit. MSI driver calls this
interface to get MSI irqdomain associated with an IR irqdomain which
manages the PCI devices. In a further step we will store the irqdomain
pointer in the device struct to avoid this call in the irq allocation
path.
Architecture specific hooks:
1) arch_get_ir_parent_domain(): get parent irqdomain for IR irqdomain,
which is x86_vector_domain on x86 platforms.
2) arch_create_msi_irq_domain(): create an MSI irqdomain associated with
the interrupt remapping unit.
We also add following callbacks into struct irq_remap_ops:
struct irq_domain *(*get_ir_irq_domain)(struct irq_alloc_info *);
struct irq_domain *(*get_irq_domain)(struct irq_alloc_info *);
Once all clients of IR have been converted to the new hierarchical irqdomain
interfaces, we will:
1) Remove set_ioapic_entry, set_affinity, free_irq, compose_msi_msg,
msi_alloc_irq, msi_setup_irq, setup_hpet_msi from struct remap_osp
2) Remove setup_ioapic_remapped_entry, free_remapped_irq,
compose_remapped_msi_msg, setup_hpet_msi_remapped, setup_remapped_irq.
3) Simplify x86_io_apic_ops and x86_msi.
We can achieve a way clearer architecture with all these changes
applied.
Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Acked-by: Joerg Roedel <jroedel@suse.de>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: David Cohen <david.a.cohen@linux.intel.com>
Cc: Sander Eikelenboom <linux@eikelenboom.it>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: iommu@lists.linux-foundation.org
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Cc: Joerg Roedel <joro@8bytes.org>
Link: http://lkml.kernel.org/r/1428905519-23704-9-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2015-04-13 13:11:30 +07:00
|
|
|
struct pci_dev;
|
|
|
|
struct msi_desc;
|
|
|
|
|
|
|
|
enum irq_alloc_type {
|
|
|
|
X86_IRQ_ALLOC_TYPE_IOAPIC = 1,
|
|
|
|
X86_IRQ_ALLOC_TYPE_HPET,
|
|
|
|
X86_IRQ_ALLOC_TYPE_MSI,
|
|
|
|
X86_IRQ_ALLOC_TYPE_MSIX,
|
2015-04-13 13:11:42 +07:00
|
|
|
X86_IRQ_ALLOC_TYPE_DMAR,
|
2015-04-13 13:11:44 +07:00
|
|
|
X86_IRQ_ALLOC_TYPE_UV,
|
irq_remapping: Introduce new interfaces to support hierarchical irqdomains
Introduce new interfaces for interrupt remapping drivers to support
hierarchical irqdomains:
1) irq_remapping_get_ir_irq_domain(): get irqdomain associated with an
interrupt remapping unit. IOAPIC/HPET drivers use this interface to
get parent interrupt remapping irqdomain.
2) irq_remapping_get_irq_domain(): get irqdomain for an IRQ allocation.
This is mainly used to support MSI irqdomain. We must build one MSI
irqdomain for each interrupt remapping unit. MSI driver calls this
interface to get MSI irqdomain associated with an IR irqdomain which
manages the PCI devices. In a further step we will store the irqdomain
pointer in the device struct to avoid this call in the irq allocation
path.
Architecture specific hooks:
1) arch_get_ir_parent_domain(): get parent irqdomain for IR irqdomain,
which is x86_vector_domain on x86 platforms.
2) arch_create_msi_irq_domain(): create an MSI irqdomain associated with
the interrupt remapping unit.
We also add following callbacks into struct irq_remap_ops:
struct irq_domain *(*get_ir_irq_domain)(struct irq_alloc_info *);
struct irq_domain *(*get_irq_domain)(struct irq_alloc_info *);
Once all clients of IR have been converted to the new hierarchical irqdomain
interfaces, we will:
1) Remove set_ioapic_entry, set_affinity, free_irq, compose_msi_msg,
msi_alloc_irq, msi_setup_irq, setup_hpet_msi from struct remap_osp
2) Remove setup_ioapic_remapped_entry, free_remapped_irq,
compose_remapped_msi_msg, setup_hpet_msi_remapped, setup_remapped_irq.
3) Simplify x86_io_apic_ops and x86_msi.
We can achieve a way clearer architecture with all these changes
applied.
Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Acked-by: Joerg Roedel <jroedel@suse.de>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: David Cohen <david.a.cohen@linux.intel.com>
Cc: Sander Eikelenboom <linux@eikelenboom.it>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: iommu@lists.linux-foundation.org
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Cc: Joerg Roedel <joro@8bytes.org>
Link: http://lkml.kernel.org/r/1428905519-23704-9-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2015-04-13 13:11:30 +07:00
|
|
|
};
|
2015-04-13 13:11:24 +07:00
|
|
|
|
|
|
|
struct irq_alloc_info {
|
irq_remapping: Introduce new interfaces to support hierarchical irqdomains
Introduce new interfaces for interrupt remapping drivers to support
hierarchical irqdomains:
1) irq_remapping_get_ir_irq_domain(): get irqdomain associated with an
interrupt remapping unit. IOAPIC/HPET drivers use this interface to
get parent interrupt remapping irqdomain.
2) irq_remapping_get_irq_domain(): get irqdomain for an IRQ allocation.
This is mainly used to support MSI irqdomain. We must build one MSI
irqdomain for each interrupt remapping unit. MSI driver calls this
interface to get MSI irqdomain associated with an IR irqdomain which
manages the PCI devices. In a further step we will store the irqdomain
pointer in the device struct to avoid this call in the irq allocation
path.
Architecture specific hooks:
1) arch_get_ir_parent_domain(): get parent irqdomain for IR irqdomain,
which is x86_vector_domain on x86 platforms.
2) arch_create_msi_irq_domain(): create an MSI irqdomain associated with
the interrupt remapping unit.
We also add following callbacks into struct irq_remap_ops:
struct irq_domain *(*get_ir_irq_domain)(struct irq_alloc_info *);
struct irq_domain *(*get_irq_domain)(struct irq_alloc_info *);
Once all clients of IR have been converted to the new hierarchical irqdomain
interfaces, we will:
1) Remove set_ioapic_entry, set_affinity, free_irq, compose_msi_msg,
msi_alloc_irq, msi_setup_irq, setup_hpet_msi from struct remap_osp
2) Remove setup_ioapic_remapped_entry, free_remapped_irq,
compose_remapped_msi_msg, setup_hpet_msi_remapped, setup_remapped_irq.
3) Simplify x86_io_apic_ops and x86_msi.
We can achieve a way clearer architecture with all these changes
applied.
Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Acked-by: Joerg Roedel <jroedel@suse.de>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: David Cohen <david.a.cohen@linux.intel.com>
Cc: Sander Eikelenboom <linux@eikelenboom.it>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: iommu@lists.linux-foundation.org
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Cc: Joerg Roedel <joro@8bytes.org>
Link: http://lkml.kernel.org/r/1428905519-23704-9-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2015-04-13 13:11:30 +07:00
|
|
|
enum irq_alloc_type type;
|
2015-04-13 13:11:24 +07:00
|
|
|
u32 flags;
|
|
|
|
const struct cpumask *mask; /* CPU mask for vector allocation */
|
irq_remapping: Introduce new interfaces to support hierarchical irqdomains
Introduce new interfaces for interrupt remapping drivers to support
hierarchical irqdomains:
1) irq_remapping_get_ir_irq_domain(): get irqdomain associated with an
interrupt remapping unit. IOAPIC/HPET drivers use this interface to
get parent interrupt remapping irqdomain.
2) irq_remapping_get_irq_domain(): get irqdomain for an IRQ allocation.
This is mainly used to support MSI irqdomain. We must build one MSI
irqdomain for each interrupt remapping unit. MSI driver calls this
interface to get MSI irqdomain associated with an IR irqdomain which
manages the PCI devices. In a further step we will store the irqdomain
pointer in the device struct to avoid this call in the irq allocation
path.
Architecture specific hooks:
1) arch_get_ir_parent_domain(): get parent irqdomain for IR irqdomain,
which is x86_vector_domain on x86 platforms.
2) arch_create_msi_irq_domain(): create an MSI irqdomain associated with
the interrupt remapping unit.
We also add following callbacks into struct irq_remap_ops:
struct irq_domain *(*get_ir_irq_domain)(struct irq_alloc_info *);
struct irq_domain *(*get_irq_domain)(struct irq_alloc_info *);
Once all clients of IR have been converted to the new hierarchical irqdomain
interfaces, we will:
1) Remove set_ioapic_entry, set_affinity, free_irq, compose_msi_msg,
msi_alloc_irq, msi_setup_irq, setup_hpet_msi from struct remap_osp
2) Remove setup_ioapic_remapped_entry, free_remapped_irq,
compose_remapped_msi_msg, setup_hpet_msi_remapped, setup_remapped_irq.
3) Simplify x86_io_apic_ops and x86_msi.
We can achieve a way clearer architecture with all these changes
applied.
Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Acked-by: Joerg Roedel <jroedel@suse.de>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: David Cohen <david.a.cohen@linux.intel.com>
Cc: Sander Eikelenboom <linux@eikelenboom.it>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: iommu@lists.linux-foundation.org
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Cc: Joerg Roedel <joro@8bytes.org>
Link: http://lkml.kernel.org/r/1428905519-23704-9-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2015-04-13 13:11:30 +07:00
|
|
|
union {
|
|
|
|
int unused;
|
|
|
|
#ifdef CONFIG_HPET_TIMER
|
|
|
|
struct {
|
|
|
|
int hpet_id;
|
|
|
|
int hpet_index;
|
|
|
|
void *hpet_data;
|
|
|
|
};
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_PCI_MSI
|
|
|
|
struct {
|
|
|
|
struct pci_dev *msi_dev;
|
|
|
|
irq_hw_number_t msi_hwirq;
|
|
|
|
};
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_X86_IO_APIC
|
|
|
|
struct {
|
|
|
|
int ioapic_id;
|
|
|
|
int ioapic_pin;
|
|
|
|
int ioapic_node;
|
|
|
|
u32 ioapic_trigger : 1;
|
|
|
|
u32 ioapic_polarity : 1;
|
|
|
|
u32 ioapic_valid : 1;
|
|
|
|
struct IO_APIC_route_entry *ioapic_entry;
|
|
|
|
};
|
2015-04-13 13:11:42 +07:00
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_DMAR_TABLE
|
|
|
|
struct {
|
|
|
|
int dmar_id;
|
|
|
|
void *dmar_data;
|
|
|
|
};
|
2015-04-13 13:11:43 +07:00
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_HT_IRQ
|
|
|
|
struct {
|
|
|
|
int ht_pos;
|
|
|
|
int ht_idx;
|
|
|
|
struct pci_dev *ht_dev;
|
|
|
|
void *ht_update;
|
|
|
|
};
|
2015-04-13 13:11:44 +07:00
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_X86_UV
|
|
|
|
struct {
|
|
|
|
int uv_limit;
|
|
|
|
int uv_blade;
|
|
|
|
unsigned long uv_offset;
|
|
|
|
char *uv_name;
|
|
|
|
};
|
x86/PCI: Add driver for Intel Volume Management Device (VMD)
The Intel Volume Management Device (VMD) is a Root Complex Integrated
Endpoint that acts as a host bridge to a secondary PCIe domain. BIOS can
reassign one or more Root Ports to appear within a VMD domain instead of
the primary domain. The immediate benefit is that additional PCIe domains
allow more than 256 buses in a system by letting bus numbers be reused
across different domains.
VMD domains do not define ACPI _SEG, so to avoid domain clashing with host
bridges defining this segment, VMD domains start at 0x10000, which is
greater than the highest possible 16-bit ACPI defined _SEG.
This driver enumerates and enables the domain using the root bus
configuration interface provided by the PCI subsystem. The driver provides
configuration space accessor functions (pci_ops), bus and memory resources,
an MSI IRQ domain with irq_chip implementation, and DMA operations
necessary to use devices through the VMD endpoint's interface.
VMD routes I/O as follows:
1) Configuration Space: BAR 0 ("CFGBAR") of VMD provides the base
address and size for configuration space register access to VMD-owned
root ports. It works similarly to MMCONFIG for extended configuration
space. Bus numbering is independent and does not conflict with the
primary domain.
2) MMIO Space: BARs 2 and 4 ("MEMBAR1" and "MEMBAR2") of VMD provide the
base address, size, and type for MMIO register access. These addresses
are not translated by VMD hardware; they are simply reservations to be
distributed to root ports' memory base/limit registers and subdivided
among devices downstream.
3) DMA: To interact appropriately with an IOMMU, the source ID DMA read
and write requests are translated to the bus-device-function of the VMD
endpoint. Otherwise, DMA operates normally without VMD-specific address
translation.
4) Interrupts: Part of VMD's BAR 4 is reserved for VMD's MSI-X Table and
PBA. MSIs from VMD domain devices and ports are remapped to appear as
if they were issued using one of VMD's MSI-X table entries. Each MSI
and MSI-X address of VMD-owned devices and ports has a special format
where the address refers to specific entries in the VMD's MSI-X table.
As with DMA, the interrupt source ID is translated to VMD's
bus-device-function.
The driver provides its own MSI and MSI-X configuration functions
specific to how MSI messages are used within the VMD domain, and
provides an irq_chip for independent IRQ allocation to relay interrupts
from VMD's interrupt handler to the appropriate device driver's handler.
5) Errors: PCIe error message are intercepted by the root ports normally
(e.g., AER), except with VMD, system errors (i.e., firmware first) are
disabled by default. AER and hotplug interrupts are translated in the
same way as endpoint interrupts.
6) VMD does not support INTx interrupts or IO ports. Devices or drivers
requiring these features should either not be placed below VMD-owned
root ports, or VMD should be disabled by BIOS for such endpoints.
[bhelgaas: add VMD BAR #defines, factor out vmd_cfg_addr(), rework VMD
resource setup, whitespace, changelog]
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de> (IRQ-related parts)
2016-01-13 03:18:10 +07:00
|
|
|
#endif
|
|
|
|
#if IS_ENABLED(CONFIG_VMD)
|
|
|
|
struct {
|
|
|
|
struct msi_desc *desc;
|
|
|
|
};
|
irq_remapping: Introduce new interfaces to support hierarchical irqdomains
Introduce new interfaces for interrupt remapping drivers to support
hierarchical irqdomains:
1) irq_remapping_get_ir_irq_domain(): get irqdomain associated with an
interrupt remapping unit. IOAPIC/HPET drivers use this interface to
get parent interrupt remapping irqdomain.
2) irq_remapping_get_irq_domain(): get irqdomain for an IRQ allocation.
This is mainly used to support MSI irqdomain. We must build one MSI
irqdomain for each interrupt remapping unit. MSI driver calls this
interface to get MSI irqdomain associated with an IR irqdomain which
manages the PCI devices. In a further step we will store the irqdomain
pointer in the device struct to avoid this call in the irq allocation
path.
Architecture specific hooks:
1) arch_get_ir_parent_domain(): get parent irqdomain for IR irqdomain,
which is x86_vector_domain on x86 platforms.
2) arch_create_msi_irq_domain(): create an MSI irqdomain associated with
the interrupt remapping unit.
We also add following callbacks into struct irq_remap_ops:
struct irq_domain *(*get_ir_irq_domain)(struct irq_alloc_info *);
struct irq_domain *(*get_irq_domain)(struct irq_alloc_info *);
Once all clients of IR have been converted to the new hierarchical irqdomain
interfaces, we will:
1) Remove set_ioapic_entry, set_affinity, free_irq, compose_msi_msg,
msi_alloc_irq, msi_setup_irq, setup_hpet_msi from struct remap_osp
2) Remove setup_ioapic_remapped_entry, free_remapped_irq,
compose_remapped_msi_msg, setup_hpet_msi_remapped, setup_remapped_irq.
3) Simplify x86_io_apic_ops and x86_msi.
We can achieve a way clearer architecture with all these changes
applied.
Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Acked-by: Joerg Roedel <jroedel@suse.de>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: David Cohen <david.a.cohen@linux.intel.com>
Cc: Sander Eikelenboom <linux@eikelenboom.it>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: iommu@lists.linux-foundation.org
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Cc: Joerg Roedel <joro@8bytes.org>
Link: http://lkml.kernel.org/r/1428905519-23704-9-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2015-04-13 13:11:30 +07:00
|
|
|
#endif
|
|
|
|
};
|
2015-04-13 13:11:24 +07:00
|
|
|
};
|
|
|
|
|
2009-10-14 03:32:36 +07:00
|
|
|
struct irq_cfg {
|
2015-04-13 13:11:23 +07:00
|
|
|
unsigned int dest_apicid;
|
2009-10-14 03:32:36 +07:00
|
|
|
u8 vector;
|
x86/irq: Cure live lock in fixup_irqs()
Harry reported, that he's able to trigger a system freeze with cpu hot
unplug. The freeze turned out to be a live lock caused by recent changes in
irq_force_complete_move().
When fixup_irqs() and from there irq_force_complete_move() is called on the
dying cpu, then all other cpus are in stop machine an wait for the dying cpu
to complete the teardown. If there is a move of an interrupt pending then
irq_force_complete_move() sends the cleanup IPI to the cpus in the old_domain
mask and waits for them to clear the mask. That's obviously impossible as
those cpus are firmly stuck in stop machine with interrupts disabled.
I should have known that, but I completely overlooked it being concentrated on
the locking issues around the vectors. And the existance of the call to
__irq_complete_move() in the code, which actually sends the cleanup IPI made
it reasonable to wait for that cleanup to complete. That call was bogus even
before the recent changes as it was just a pointless distraction.
We have to look at two cases:
1) The move_in_progress flag of the interrupt is set
This means the ioapic has been updated with the new vector, but it has not
fired yet. In theory there is a race:
set_ioapic(new_vector) <-- Interrupt is raised before update is effective,
i.e. it's raised on the old vector.
So if the target cpu cannot handle that interrupt before the old vector is
cleaned up, we get a spurious interrupt and in the worst case the ioapic
irq line becomes stale, but my experiments so far have only resulted in
spurious interrupts.
But in case of cpu hotplug this should be a non issue because if the
affinity update happens right before all cpus rendevouz in stop machine,
there is no way that the interrupt can be blocked on the target cpu because
all cpus loops first with interrupts enabled in stop machine, so the old
vector is not yet cleaned up when the interrupt fires.
So the only way to run into this issue is if the delivery of the interrupt
on the apic/system bus would be delayed beyond the point where the target
cpu disables interrupts in stop machine. I doubt that it can happen, but at
least there is a theroretical chance. Virtualization might be able to
expose this, but AFAICT the IOAPIC emulation is not as stupid as the real
hardware.
I've spent quite some time over the weekend to enforce that situation,
though I was not able to trigger the delayed case.
2) The move_in_progress flag is not set and the old_domain cpu mask is not
empty.
That means, that an interrupt was delivered after the change and the
cleanup IPI has been sent to the cpus in old_domain, but not all CPUs have
responded to it yet.
In both cases we can assume that the next interrupt will arrive on the new
vector, so we can cleanup the old vectors on the cpus in the old_domain cpu
mask.
Fixes: 98229aa36caa "x86/irq: Plug vector cleanup race"
Reported-by: Harry Junior <harryjr@outlook.fr>
Tested-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Joe Lawrence <joe.lawrence@stratus.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Ben Hutchings <ben@decadent.org.uk>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1603140931430.3657@nanos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2016-03-14 15:40:46 +07:00
|
|
|
u8 old_vector;
|
2009-10-14 03:32:36 +07:00
|
|
|
};
|
|
|
|
|
2014-10-27 15:11:59 +07:00
|
|
|
extern struct irq_cfg *irq_cfg(unsigned int irq);
|
|
|
|
extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data);
|
2014-10-27 15:12:00 +07:00
|
|
|
extern void lock_vector_lock(void);
|
|
|
|
extern void unlock_vector_lock(void);
|
|
|
|
extern void setup_vector_irq(int cpu);
|
2014-10-27 15:12:11 +07:00
|
|
|
#ifdef CONFIG_SMP
|
2009-10-14 03:32:36 +07:00
|
|
|
extern void send_cleanup_vector(struct irq_cfg *);
|
2014-11-05 16:12:27 +07:00
|
|
|
extern void irq_complete_move(struct irq_cfg *cfg);
|
2014-10-27 15:12:11 +07:00
|
|
|
#else
|
|
|
|
static inline void send_cleanup_vector(struct irq_cfg *c) { }
|
2014-11-05 16:12:27 +07:00
|
|
|
static inline void irq_complete_move(struct irq_cfg *c) { }
|
2014-10-27 15:12:11 +07:00
|
|
|
#endif
|
2009-10-14 20:06:42 +07:00
|
|
|
|
2014-10-27 15:12:00 +07:00
|
|
|
extern void apic_ack_edge(struct irq_data *data);
|
|
|
|
#else /* CONFIG_X86_LOCAL_APIC */
|
2014-10-27 15:11:56 +07:00
|
|
|
static inline void lock_vector_lock(void) {}
|
|
|
|
static inline void unlock_vector_lock(void) {}
|
2014-10-27 15:12:00 +07:00
|
|
|
#endif /* CONFIG_X86_LOCAL_APIC */
|
2014-10-27 15:11:56 +07:00
|
|
|
|
2008-05-03 00:00:30 +07:00
|
|
|
/* Statistics */
|
|
|
|
extern atomic_t irq_err_count;
|
|
|
|
extern atomic_t irq_mis_count;
|
|
|
|
|
2015-05-10 07:27:37 +07:00
|
|
|
extern void elcr_set_level_irq(unsigned int irq);
|
2008-05-03 05:30:50 +07:00
|
|
|
|
2015-04-04 02:49:13 +07:00
|
|
|
extern char irq_entries_start[];
|
2013-10-31 03:37:00 +07:00
|
|
|
#ifdef CONFIG_TRACING
|
2015-04-04 02:49:13 +07:00
|
|
|
#define trace_irq_entries_start irq_entries_start
|
2013-10-31 03:37:00 +07:00
|
|
|
#endif
|
2008-08-20 10:50:28 +07:00
|
|
|
|
2015-08-03 03:38:27 +07:00
|
|
|
#define VECTOR_UNUSED NULL
|
|
|
|
#define VECTOR_RETRIGGERED ((void *)~0UL)
|
2014-01-05 23:10:52 +07:00
|
|
|
|
2015-08-03 03:38:27 +07:00
|
|
|
typedef struct irq_desc* vector_irq_t[NR_VECTORS];
|
2008-05-03 03:10:39 +07:00
|
|
|
DECLARE_PER_CPU(vector_irq_t, vector_irq);
|
2008-05-03 00:00:30 +07:00
|
|
|
|
2008-05-03 03:10:39 +07:00
|
|
|
#endif /* !ASSEMBLY_ */
|
|
|
|
|
2008-10-23 12:26:29 +07:00
|
|
|
#endif /* _ASM_X86_HW_IRQ_H */
|