linux_dsm_epyc7002/arch/x86/kernel/acpi/boot.c

1763 lines
43 KiB
C
Raw Normal View History

/*
* boot.c - Architecture-Specific Low-Level ACPI Boot Support
*
* Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
* Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com>
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <linux/init.h>
#include <linux/acpi.h>
#include <linux/acpi_pmtmr.h>
#include <linux/efi.h>
#include <linux/cpumask.h>
#include <linux/export.h>
#include <linux/dmi.h>
#include <linux/irq.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 15:04:11 +07:00
#include <linux/slab.h>
#include <linux/bootmem.h>
#include <linux/ioport.h>
#include <linux/pci.h>
#include <linux/efi-bgrt.h>
#include <linux/serial_core.h>
#include <asm/e820/api.h>
#include <asm/irqdomain.h>
#include <asm/pci_x86.h>
#include <asm/pgtable.h>
#include <asm/io_apic.h>
#include <asm/apic.h>
#include <asm/io.h>
#include <asm/mpspec.h>
#include <asm/smp.h>
#include <asm/i8259.h>
#include "sleep.h" /* To include x86_acpi_suspend_lowlevel */
static int __initdata acpi_force = 0;
int acpi_disabled;
EXPORT_SYMBOL(acpi_disabled);
#ifdef CONFIG_X86_64
# include <asm/proto.h>
#endif /* X86 */
#define PREFIX "ACPI: "
int acpi_noirq; /* skip ACPI IRQ initialization */
int acpi_pci_disabled; /* skip ACPI PCI scan and IRQ initialization */
EXPORT_SYMBOL(acpi_pci_disabled);
int acpi_lapic;
int acpi_ioapic;
int acpi_strict;
int acpi_disable_cmcff;
/* ACPI SCI override configuration */
u8 acpi_sci_flags __initdata;
u32 acpi_sci_override_gsi __initdata = INVALID_ACPI_IRQ;
int acpi_skip_timer_override __initdata;
int acpi_use_timer_override __initdata;
int acpi_fix_pin2_polarity __initdata;
#ifdef CONFIG_X86_LOCAL_APIC
static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
#endif
#ifdef CONFIG_X86_IO_APIC
/*
* Locks related to IOAPIC hotplug
* Hotplug side:
* ->device_hotplug_lock
* ->acpi_ioapic_lock
* ->ioapic_lock
* Interrupt mapping side:
* ->acpi_ioapic_lock
* ->ioapic_mutex
* ->ioapic_lock
*/
static DEFINE_MUTEX(acpi_ioapic_lock);
#endif
/* --------------------------------------------------------------------------
Boot-time Configuration
-------------------------------------------------------------------------- */
/*
* The default interrupt routing model is PIC (8259). This gets
* overridden if IOAPICs are enumerated (below).
*/
enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
/*
* ISA irqs by default are the first 16 gsis but can be
* any gsi as specified by an interrupt source override.
*/
static u32 isa_irq_to_gsi[NR_IRQS_LEGACY] __read_mostly = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
};
/*
x86, mpparse, x86/acpi, x86/PCI, x86/dmi, SFI: Use memremap() for RAM mappings The ioremap() function is intended for mapping MMIO. For RAM, the memremap() function should be used. Convert calls from ioremap() to memremap() when re-mapping RAM. This will be used later by SME to control how the encryption mask is applied to memory mappings, with certain memory locations being mapped decrypted vs encrypted. Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Borislav Petkov <bp@suse.de> Cc: Alexander Potapenko <glider@google.com> Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Borislav Petkov <bp@alien8.de> Cc: Brijesh Singh <brijesh.singh@amd.com> Cc: Dave Young <dyoung@redhat.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Larry Woodman <lwoodman@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Matt Fleming <matt@codeblueprint.co.uk> Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Radim Krčmář <rkrcmar@redhat.com> Cc: Rik van Riel <riel@redhat.com> Cc: Toshimitsu Kani <toshi.kani@hpe.com> Cc: kasan-dev@googlegroups.com Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/b13fccb9abbd547a7eef7b1fdfc223431b211c88.1500319216.git.thomas.lendacky@amd.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-07-18 04:10:00 +07:00
* This is just a simple wrapper around early_memremap(),
* with sanity checks for phys == 0 and size == 0.
*/
void __init __iomem *__acpi_map_table(unsigned long phys, unsigned long size)
{
if (!phys || !size)
return NULL;
x86, mpparse, x86/acpi, x86/PCI, x86/dmi, SFI: Use memremap() for RAM mappings The ioremap() function is intended for mapping MMIO. For RAM, the memremap() function should be used. Convert calls from ioremap() to memremap() when re-mapping RAM. This will be used later by SME to control how the encryption mask is applied to memory mappings, with certain memory locations being mapped decrypted vs encrypted. Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Borislav Petkov <bp@suse.de> Cc: Alexander Potapenko <glider@google.com> Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Borislav Petkov <bp@alien8.de> Cc: Brijesh Singh <brijesh.singh@amd.com> Cc: Dave Young <dyoung@redhat.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Larry Woodman <lwoodman@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Matt Fleming <matt@codeblueprint.co.uk> Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Radim Krčmář <rkrcmar@redhat.com> Cc: Rik van Riel <riel@redhat.com> Cc: Toshimitsu Kani <toshi.kani@hpe.com> Cc: kasan-dev@googlegroups.com Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/b13fccb9abbd547a7eef7b1fdfc223431b211c88.1500319216.git.thomas.lendacky@amd.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-07-18 04:10:00 +07:00
return early_memremap(phys, size);
}
void __init __acpi_unmap_table(void __iomem *map, unsigned long size)
{
if (!map || !size)
return;
x86, mpparse, x86/acpi, x86/PCI, x86/dmi, SFI: Use memremap() for RAM mappings The ioremap() function is intended for mapping MMIO. For RAM, the memremap() function should be used. Convert calls from ioremap() to memremap() when re-mapping RAM. This will be used later by SME to control how the encryption mask is applied to memory mappings, with certain memory locations being mapped decrypted vs encrypted. Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Borislav Petkov <bp@suse.de> Cc: Alexander Potapenko <glider@google.com> Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Borislav Petkov <bp@alien8.de> Cc: Brijesh Singh <brijesh.singh@amd.com> Cc: Dave Young <dyoung@redhat.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Larry Woodman <lwoodman@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Matt Fleming <matt@codeblueprint.co.uk> Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Radim Krčmář <rkrcmar@redhat.com> Cc: Rik van Riel <riel@redhat.com> Cc: Toshimitsu Kani <toshi.kani@hpe.com> Cc: kasan-dev@googlegroups.com Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/b13fccb9abbd547a7eef7b1fdfc223431b211c88.1500319216.git.thomas.lendacky@amd.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-07-18 04:10:00 +07:00
early_memunmap(map, size);
}
#ifdef CONFIG_X86_LOCAL_APIC
static int __init acpi_parse_madt(struct acpi_table_header *table)
{
struct acpi_table_madt *madt = NULL;
if (!boot_cpu_has(X86_FEATURE_APIC))
return -EINVAL;
madt = (struct acpi_table_madt *)table;
if (!madt) {
printk(KERN_WARNING PREFIX "Unable to map MADT\n");
return -ENODEV;
}
if (madt->address) {
acpi_lapic_addr = (u64) madt->address;
printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n",
madt->address);
}
default_acpi_madt_oem_check(madt->header.oem_id,
madt->header.oem_table_id);
return 0;
}
/**
* acpi_register_lapic - register a local apic and generates a logic cpu number
* @id: local apic id to register
* @acpiid: ACPI id to register
* @enabled: this cpu is enabled or not
*
* Returns the logic cpu number which maps to the local apic
*/
static int acpi_register_lapic(int id, u32 acpiid, u8 enabled)
{
unsigned int ver = 0;
int cpu;
if (id >= MAX_LOCAL_APIC) {
x86, acpi: Parse all SRAT cpu entries even above the cpu number limitation Recent Intel new system have different order in MADT, aka will list all thread0 at first, then all thread1. But SRAT table still old order, it will list cpus in one socket all together. If the user have compiled limited NR_CPUS or boot with nr_cpus=, could have missed to put some cpus apic id to node mapping into apicid_to_node[]. for example for 4 sockets system with 64 cpus with nr_cpus=32 will get crash... [ 9.106288] Total of 32 processors activated (136190.88 BogoMIPS). [ 9.235021] divide error: 0000 [#1] SMP [ 9.235315] last sysfs file: [ 9.235481] CPU 1 [ 9.235592] Modules linked in: [ 9.245398] [ 9.245478] Pid: 2, comm: kthreadd Not tainted 2.6.37-rc1-tip-yh-01782-ge92ef79-dirty #274 /Sun Fire x4800 [ 9.265415] RIP: 0010:[<ffffffff81075a8f>] [<ffffffff81075a8f>] select_task_rq_fair+0x4f0/0x623 ... [ 9.645938] RIP [<ffffffff81075a8f>] select_task_rq_fair+0x4f0/0x623 [ 9.665356] RSP <ffff88103f8d1c40> [ 9.665568] ---[ end trace 2296156d35fdfc87 ]--- So let just parse all cpu entries in SRAT. Also add apicid checking with MAX_LOCAL_APIC, in case We could out of boundaries of apicid_to_node[]. it fixes following bug too. https://bugzilla.kernel.org/show_bug.cgi?id=22662 -v2: expand to 32bit according to hpa need to add MAX_LOCAL_APIC for 32bit Reported-and-Tested-by: Wu Fengguang <fengguang.wu@intel.com> Reported-by: Bjorn Helgaas <bjorn.helgaas@hp.com> Tested-by: Myron Stowe <myron.stowe@hp.com> Signed-off-by: Yinghai Lu <yinghai@kernel.org> LKML-Reference: <4D0AD486.9020704@kernel.org> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2010-12-17 10:09:58 +07:00
printk(KERN_INFO PREFIX "skipped apicid that is too big\n");
return -EINVAL;
x86, acpi: Parse all SRAT cpu entries even above the cpu number limitation Recent Intel new system have different order in MADT, aka will list all thread0 at first, then all thread1. But SRAT table still old order, it will list cpus in one socket all together. If the user have compiled limited NR_CPUS or boot with nr_cpus=, could have missed to put some cpus apic id to node mapping into apicid_to_node[]. for example for 4 sockets system with 64 cpus with nr_cpus=32 will get crash... [ 9.106288] Total of 32 processors activated (136190.88 BogoMIPS). [ 9.235021] divide error: 0000 [#1] SMP [ 9.235315] last sysfs file: [ 9.235481] CPU 1 [ 9.235592] Modules linked in: [ 9.245398] [ 9.245478] Pid: 2, comm: kthreadd Not tainted 2.6.37-rc1-tip-yh-01782-ge92ef79-dirty #274 /Sun Fire x4800 [ 9.265415] RIP: 0010:[<ffffffff81075a8f>] [<ffffffff81075a8f>] select_task_rq_fair+0x4f0/0x623 ... [ 9.645938] RIP [<ffffffff81075a8f>] select_task_rq_fair+0x4f0/0x623 [ 9.665356] RSP <ffff88103f8d1c40> [ 9.665568] ---[ end trace 2296156d35fdfc87 ]--- So let just parse all cpu entries in SRAT. Also add apicid checking with MAX_LOCAL_APIC, in case We could out of boundaries of apicid_to_node[]. it fixes following bug too. https://bugzilla.kernel.org/show_bug.cgi?id=22662 -v2: expand to 32bit according to hpa need to add MAX_LOCAL_APIC for 32bit Reported-and-Tested-by: Wu Fengguang <fengguang.wu@intel.com> Reported-by: Bjorn Helgaas <bjorn.helgaas@hp.com> Tested-by: Myron Stowe <myron.stowe@hp.com> Signed-off-by: Yinghai Lu <yinghai@kernel.org> LKML-Reference: <4D0AD486.9020704@kernel.org> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
2010-12-17 10:09:58 +07:00
}
if (!enabled) {
++disabled_cpus;
return -EINVAL;
}
if (boot_cpu_physical_apicid != -1U)
ver = boot_cpu_apic_version;
cpu = generic_processor_info(id, ver);
if (cpu >= 0)
early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid;
return cpu;
}
static int __init
acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
{
struct acpi_madt_local_x2apic *processor = NULL;
#ifdef CONFIG_X86_X2APIC
int apic_id;
u8 enabled;
#endif
processor = (struct acpi_madt_local_x2apic *)header;
if (BAD_MADT_ENTRY(processor, end))
return -EINVAL;
acpi_table_print_madt_entry(header);
#ifdef CONFIG_X86_X2APIC
apic_id = processor->local_apic_id;
enabled = processor->lapic_flags & ACPI_MADT_ENABLED;
/*
* We need to register disabled CPU as well to permit
* counting disabled CPUs. This allows us to size
* cpus_possible_map more accurately, to permit
* to not preallocating memory for all NR_CPUS
* when we use CPU hotplug.
*/
if (!apic->apic_id_valid(apic_id) && enabled)
printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
else
acpi_register_lapic(apic_id, processor->uid, enabled);
#else
printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
#endif
return 0;
}
static int __init
acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end)
{
struct acpi_madt_local_apic *processor = NULL;
processor = (struct acpi_madt_local_apic *)header;
if (BAD_MADT_ENTRY(processor, end))
return -EINVAL;
acpi_table_print_madt_entry(header);
/* Ignore invalid ID */
if (processor->id == 0xff)
return 0;
/*
* We need to register disabled CPU as well to permit
* counting disabled CPUs. This allows us to size
* cpus_possible_map more accurately, to permit
* to not preallocating memory for all NR_CPUS
* when we use CPU hotplug.
*/
acpi_register_lapic(processor->id, /* APIC ID */
processor->processor_id, /* ACPI ID */
processor->lapic_flags & ACPI_MADT_ENABLED);
return 0;
}
static int __init
acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end)
{
struct acpi_madt_local_sapic *processor = NULL;
processor = (struct acpi_madt_local_sapic *)header;
if (BAD_MADT_ENTRY(processor, end))
return -EINVAL;
acpi_table_print_madt_entry(header);
acpi_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */
processor->processor_id, /* ACPI ID */
processor->lapic_flags & ACPI_MADT_ENABLED);
return 0;
}
static int __init
acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header,
const unsigned long end)
{
struct acpi_madt_local_apic_override *lapic_addr_ovr = NULL;
lapic_addr_ovr = (struct acpi_madt_local_apic_override *)header;
if (BAD_MADT_ENTRY(lapic_addr_ovr, end))
return -EINVAL;
acpi_table_print_madt_entry(header);
acpi_lapic_addr = lapic_addr_ovr->address;
return 0;
}
static int __init
acpi_parse_x2apic_nmi(struct acpi_subtable_header *header,
const unsigned long end)
{
struct acpi_madt_local_x2apic_nmi *x2apic_nmi = NULL;
x2apic_nmi = (struct acpi_madt_local_x2apic_nmi *)header;
if (BAD_MADT_ENTRY(x2apic_nmi, end))
return -EINVAL;
acpi_table_print_madt_entry(header);
if (x2apic_nmi->lint != 1)
printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
return 0;
}
static int __init
acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end)
{
struct acpi_madt_local_apic_nmi *lapic_nmi = NULL;
lapic_nmi = (struct acpi_madt_local_apic_nmi *)header;
if (BAD_MADT_ENTRY(lapic_nmi, end))
return -EINVAL;
acpi_table_print_madt_entry(header);
if (lapic_nmi->lint != 1)
printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
return 0;
}
#endif /*CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_IO_APIC
#define MP_ISA_BUS 0
static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
u8 trigger, u32 gsi);
static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
u32 gsi)
{
/*
* Check bus_irq boundary.
*/
if (bus_irq >= NR_IRQS_LEGACY) {
pr_warn("Invalid bus_irq %u for legacy override\n", bus_irq);
return;
}
/*
* TBD: This check is for faulty timer entries, where the override
* erroneously sets the trigger to level, resulting in a HUGE
* increase of timer interrupts!
*/
if ((bus_irq == 0) && (trigger == 3))
trigger = 1;
if (mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi) < 0)
return;
/*
* Reset default identity mapping if gsi is also an legacy IRQ,
* otherwise there will be more than one entry with the same GSI
* and acpi_isa_irq_to_gsi() may give wrong result.
*/
if (gsi < nr_legacy_irqs() && isa_irq_to_gsi[gsi] == gsi)
isa_irq_to_gsi[gsi] = INVALID_ACPI_IRQ;
isa_irq_to_gsi[bus_irq] = gsi;
}
static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
int polarity)
{
#ifdef CONFIG_X86_MPPARSE
struct mpc_intsrc mp_irq;
struct pci_dev *pdev;
unsigned char number;
unsigned int devfn;
int ioapic;
u8 pin;
if (!acpi_ioapic)
return 0;
if (!dev || !dev_is_pci(dev))
return 0;
pdev = to_pci_dev(dev);
number = pdev->bus->number;
devfn = pdev->devfn;
pin = pdev->pin;
/* print the entry should happen on mptable identically */
mp_irq.type = MP_INTSRC;
mp_irq.irqtype = mp_INT;
mp_irq.irqflag = (trigger == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
(polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
mp_irq.srcbus = number;
mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
ioapic = mp_find_ioapic(gsi);
mp_irq.dstapic = mpc_ioapic_id(ioapic);
mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
mp_save_irq(&mp_irq);
#endif
return 0;
}
static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
u8 trigger, u32 gsi)
{
struct mpc_intsrc mp_irq;
int ioapic, pin;
/* Convert 'gsi' to 'ioapic.pin'(INTIN#) */
ioapic = mp_find_ioapic(gsi);
if (ioapic < 0) {
pr_warn("Failed to find ioapic for gsi : %u\n", gsi);
return ioapic;
}
pin = mp_find_ioapic_pin(ioapic, gsi);
mp_irq.type = MP_INTSRC;
mp_irq.irqtype = mp_INT;
mp_irq.irqflag = (trigger << 2) | polarity;
mp_irq.srcbus = MP_ISA_BUS;
mp_irq.srcbusirq = bus_irq;
mp_irq.dstapic = mpc_ioapic_id(ioapic);
mp_irq.dstirq = pin;
mp_save_irq(&mp_irq);
return 0;
}
static int __init
acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
{
struct acpi_madt_io_apic *ioapic = NULL;
struct ioapic_domain_cfg cfg = {
.type = IOAPIC_DOMAIN_DYNAMIC,
.ops = &mp_ioapic_irqdomain_ops,
};
ioapic = (struct acpi_madt_io_apic *)header;
if (BAD_MADT_ENTRY(ioapic, end))
return -EINVAL;
acpi_table_print_madt_entry(header);
/* Statically assign IRQ numbers for IOAPICs hosting legacy IRQs */
if (ioapic->global_irq_base < nr_legacy_irqs())
cfg.type = IOAPIC_DOMAIN_LEGACY;
mp_register_ioapic(ioapic->id, ioapic->address, ioapic->global_irq_base,
&cfg);
return 0;
}
/*
* Parse Interrupt Source Override for the ACPI SCI
*/
static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, u32 gsi)
{
if (trigger == 0) /* compatible SCI trigger is level */
trigger = 3;
if (polarity == 0) /* compatible SCI polarity is low */
polarity = 3;
/* Command-line over-ride via acpi_sci= */
if (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK)
trigger = (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) >> 2;
if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK)
polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
if (bus_irq < NR_IRQS_LEGACY)
mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
else
mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi);
acpi_penalize_sci_irq(bus_irq, trigger, polarity);
/*
* stash over-ride to indicate we've been here
* and for later update of acpi_gbl_FADT
*/
acpi_sci_override_gsi = gsi;
return;
}
static int __init
acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
const unsigned long end)
{
struct acpi_madt_interrupt_override *intsrc = NULL;
intsrc = (struct acpi_madt_interrupt_override *)header;
if (BAD_MADT_ENTRY(intsrc, end))
return -EINVAL;
acpi_table_print_madt_entry(header);
if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) {
acpi_sci_ioapic_setup(intsrc->source_irq,
intsrc->inti_flags & ACPI_MADT_POLARITY_MASK,
(intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) >> 2,
intsrc->global_irq);
return 0;
}
if (intsrc->source_irq == 0) {
if (acpi_skip_timer_override) {
printk(PREFIX "BIOS IRQ0 override ignored.\n");
return 0;
}
if ((intsrc->global_irq == 2) && acpi_fix_pin2_polarity
&& (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) {
intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK;
printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n");
}
}
mp_override_legacy_irq(intsrc->source_irq,
intsrc->inti_flags & ACPI_MADT_POLARITY_MASK,
(intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) >> 2,
intsrc->global_irq);
return 0;
}
static int __init
acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end)
{
struct acpi_madt_nmi_source *nmi_src = NULL;
nmi_src = (struct acpi_madt_nmi_source *)header;
if (BAD_MADT_ENTRY(nmi_src, end))
return -EINVAL;
acpi_table_print_madt_entry(header);
/* TBD: Support nimsrc entries? */
return 0;
}
#endif /* CONFIG_X86_IO_APIC */
/*
* acpi_pic_sci_set_trigger()
*
* use ELCR to set PIC-mode trigger type for SCI
*
* If a PIC-mode SCI is not recognized or gives spurious IRQ7's
* it may require Edge Trigger -- use "acpi_sci=edge"
*
* Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers
* for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge.
* ECLR1 is IRQs 0-7 (IRQ 0, 1, 2 must be 0)
* ECLR2 is IRQs 8-15 (IRQ 8, 13 must be 0)
*/
void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
{
unsigned int mask = 1 << irq;
unsigned int old, new;
/* Real old ELCR mask */
old = inb(0x4d0) | (inb(0x4d1) << 8);
/*
* If we use ACPI to set PCI IRQs, then we should clear ELCR
* since we will set it correctly as we enable the PCI irq
* routing.
*/
new = acpi_noirq ? old : 0;
/*
* Update SCI information in the ELCR, it isn't in the PCI
* routing tables..
*/
switch (trigger) {
case 1: /* Edge - clear */
new &= ~mask;
break;
case 3: /* Level - set */
new |= mask;
break;
}
if (old == new)
return;
printk(PREFIX "setting ELCR to %04x (from %04x)\n", new, old);
outb(new, 0x4d0);
outb(new >> 8, 0x4d1);
}
int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp)
{
x86/xen: Treat SCI interrupt as normal GSI interrupt Currently Xen Domain0 has special treatment for ACPI SCI interrupt, that is initialize irq for ACPI SCI at early stage in a special way as: xen_init_IRQ() ->pci_xen_initial_domain() ->xen_setup_acpi_sci() Allocate and initialize irq for ACPI SCI Function xen_setup_acpi_sci() calls acpi_gsi_to_irq() to get an irq number for ACPI SCI. But unfortunately acpi_gsi_to_irq() depends on IOAPIC irqdomains through following path acpi_gsi_to_irq() ->mp_map_gsi_to_irq() ->mp_map_pin_to_irq() ->check IOAPIC irqdomain For PV domains, it uses Xen event based interrupt manangement and doesn't make uses of native IOAPIC, so no irqdomains created for IOAPIC. This causes Xen domain0 fail to install interrupt handler for ACPI SCI and all ACPI events will be lost. Please refer to: https://lkml.org/lkml/2014/12/19/178 So the fix is to get rid of special treatment for ACPI SCI, just treat ACPI SCI as normal GSI interrupt as: acpi_gsi_to_irq() ->acpi_register_gsi() ->acpi_register_gsi_xen() ->xen_register_gsi() With above change, there's no need for xen_setup_acpi_sci() anymore. The above change also works with bare metal kernel too. Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com> Tested-by: Sander Eikelenboom <linux@eikelenboom.it> Cc: Tony Luck <tony.luck@intel.com> Cc: xen-devel@lists.xenproject.org Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: David Vrabel <david.vrabel@citrix.com> Cc: Rafael J. Wysocki <rjw@rjwysocki.net> Cc: Len Brown <len.brown@intel.com> Cc: Pavel Machek <pavel@ucw.cz> Cc: Bjorn Helgaas <bhelgaas@google.com> Link: http://lkml.kernel.org/r/1421720467-7709-2-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2015-01-20 09:21:05 +07:00
int rc, irq, trigger, polarity;
if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
*irqp = gsi;
return 0;
}
x86/xen: Treat SCI interrupt as normal GSI interrupt Currently Xen Domain0 has special treatment for ACPI SCI interrupt, that is initialize irq for ACPI SCI at early stage in a special way as: xen_init_IRQ() ->pci_xen_initial_domain() ->xen_setup_acpi_sci() Allocate and initialize irq for ACPI SCI Function xen_setup_acpi_sci() calls acpi_gsi_to_irq() to get an irq number for ACPI SCI. But unfortunately acpi_gsi_to_irq() depends on IOAPIC irqdomains through following path acpi_gsi_to_irq() ->mp_map_gsi_to_irq() ->mp_map_pin_to_irq() ->check IOAPIC irqdomain For PV domains, it uses Xen event based interrupt manangement and doesn't make uses of native IOAPIC, so no irqdomains created for IOAPIC. This causes Xen domain0 fail to install interrupt handler for ACPI SCI and all ACPI events will be lost. Please refer to: https://lkml.org/lkml/2014/12/19/178 So the fix is to get rid of special treatment for ACPI SCI, just treat ACPI SCI as normal GSI interrupt as: acpi_gsi_to_irq() ->acpi_register_gsi() ->acpi_register_gsi_xen() ->xen_register_gsi() With above change, there's no need for xen_setup_acpi_sci() anymore. The above change also works with bare metal kernel too. Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com> Tested-by: Sander Eikelenboom <linux@eikelenboom.it> Cc: Tony Luck <tony.luck@intel.com> Cc: xen-devel@lists.xenproject.org Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: David Vrabel <david.vrabel@citrix.com> Cc: Rafael J. Wysocki <rjw@rjwysocki.net> Cc: Len Brown <len.brown@intel.com> Cc: Pavel Machek <pavel@ucw.cz> Cc: Bjorn Helgaas <bhelgaas@google.com> Link: http://lkml.kernel.org/r/1421720467-7709-2-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2015-01-20 09:21:05 +07:00
rc = acpi_get_override_irq(gsi, &trigger, &polarity);
if (rc)
return rc;
x86/xen: Treat SCI interrupt as normal GSI interrupt Currently Xen Domain0 has special treatment for ACPI SCI interrupt, that is initialize irq for ACPI SCI at early stage in a special way as: xen_init_IRQ() ->pci_xen_initial_domain() ->xen_setup_acpi_sci() Allocate and initialize irq for ACPI SCI Function xen_setup_acpi_sci() calls acpi_gsi_to_irq() to get an irq number for ACPI SCI. But unfortunately acpi_gsi_to_irq() depends on IOAPIC irqdomains through following path acpi_gsi_to_irq() ->mp_map_gsi_to_irq() ->mp_map_pin_to_irq() ->check IOAPIC irqdomain For PV domains, it uses Xen event based interrupt manangement and doesn't make uses of native IOAPIC, so no irqdomains created for IOAPIC. This causes Xen domain0 fail to install interrupt handler for ACPI SCI and all ACPI events will be lost. Please refer to: https://lkml.org/lkml/2014/12/19/178 So the fix is to get rid of special treatment for ACPI SCI, just treat ACPI SCI as normal GSI interrupt as: acpi_gsi_to_irq() ->acpi_register_gsi() ->acpi_register_gsi_xen() ->xen_register_gsi() With above change, there's no need for xen_setup_acpi_sci() anymore. The above change also works with bare metal kernel too. Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com> Tested-by: Sander Eikelenboom <linux@eikelenboom.it> Cc: Tony Luck <tony.luck@intel.com> Cc: xen-devel@lists.xenproject.org Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: David Vrabel <david.vrabel@citrix.com> Cc: Rafael J. Wysocki <rjw@rjwysocki.net> Cc: Len Brown <len.brown@intel.com> Cc: Pavel Machek <pavel@ucw.cz> Cc: Bjorn Helgaas <bhelgaas@google.com> Link: http://lkml.kernel.org/r/1421720467-7709-2-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2015-01-20 09:21:05 +07:00
trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
irq = acpi_register_gsi(NULL, gsi, trigger, polarity);
if (irq < 0)
return irq;
*irqp = irq;
return 0;
}
EXPORT_SYMBOL_GPL(acpi_gsi_to_irq);
int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi)
{
if (isa_irq < nr_legacy_irqs() &&
isa_irq_to_gsi[isa_irq] != INVALID_ACPI_IRQ) {
*gsi = isa_irq_to_gsi[isa_irq];
return 0;
}
return -1;
}
static int acpi_register_gsi_pic(struct device *dev, u32 gsi,
int trigger, int polarity)
{
#ifdef CONFIG_PCI
/*
* Make sure all (legacy) PCI IRQs are set as level-triggered.
*/
if (trigger == ACPI_LEVEL_SENSITIVE)
elcr_set_level_irq(gsi);
#endif
return gsi;
}
#ifdef CONFIG_X86_LOCAL_APIC
static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
int trigger, int polarity)
{
int irq = gsi;
#ifdef CONFIG_X86_IO_APIC
int node;
struct irq_alloc_info info;
node = dev ? dev_to_node(dev) : NUMA_NO_NODE;
trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1;
polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1;
ioapic_set_alloc_attr(&info, node, trigger, polarity);
mutex_lock(&acpi_ioapic_lock);
irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info);
/* Don't set up the ACPI SCI because it's already set up */
if (irq >= 0 && enable_update_mptable && gsi != acpi_gbl_FADT.sci_interrupt)
mp_config_acpi_gsi(dev, gsi, trigger, polarity);
mutex_unlock(&acpi_ioapic_lock);
#endif
return irq;
}
static void acpi_unregister_gsi_ioapic(u32 gsi)
{
#ifdef CONFIG_X86_IO_APIC
int irq;
mutex_lock(&acpi_ioapic_lock);
irq = mp_map_gsi_to_irq(gsi, 0, NULL);
if (irq > 0)
mp_unmap_irq(irq);
mutex_unlock(&acpi_ioapic_lock);
#endif
}
#endif
int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
int trigger, int polarity) = acpi_register_gsi_pic;
void (*__acpi_unregister_gsi)(u32 gsi) = NULL;
#ifdef CONFIG_ACPI_SLEEP
int (*acpi_suspend_lowlevel)(void) = x86_acpi_suspend_lowlevel;
#else
int (*acpi_suspend_lowlevel)(void);
#endif
/*
* success: return IRQ number (>=0)
* failure: return < 0
*/
int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
{
return __acpi_register_gsi(dev, gsi, trigger, polarity);
}
EXPORT_SYMBOL_GPL(acpi_register_gsi);
void acpi_unregister_gsi(u32 gsi)
{
if (__acpi_unregister_gsi)
__acpi_unregister_gsi(gsi);
}
EXPORT_SYMBOL_GPL(acpi_unregister_gsi);
#ifdef CONFIG_X86_LOCAL_APIC
static void __init acpi_set_irq_model_ioapic(void)
{
acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
__acpi_register_gsi = acpi_register_gsi_ioapic;
__acpi_unregister_gsi = acpi_unregister_gsi_ioapic;
acpi_ioapic = 1;
}
#endif
/*
* ACPI based hotplug support for CPU
*/
#ifdef CONFIG_ACPI_HOTPLUG_CPU
#include <acpi/processor.h>
static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
{
#ifdef CONFIG_ACPI_NUMA
int nid;
nid = acpi_get_node(handle);
if (nid != NUMA_NO_NODE) {
set_apicid_to_node(physid, nid);
numa_set_node(cpu, nid);
}
#endif
x86/acpi: Set persistent cpuid <-> nodeid mapping when booting The whole patch-set aims at making cpuid <-> nodeid mapping persistent. So that, when node online/offline happens, cache based on cpuid <-> nodeid mapping such as wq_numa_possible_cpumask will not cause any problem. It contains 4 steps: 1. Enable apic registeration flow to handle both enabled and disabled cpus. 2. Introduce a new array storing all possible cpuid <-> apicid mapping. 3. Enable _MAT and MADT relative apis to return non-present or disabled cpus' apicid. 4. Establish all possible cpuid <-> nodeid mapping. This patch finishes step 4. This patch set the persistent cpuid <-> nodeid mapping for all enabled/disabled processors at boot time via an additional acpi namespace walk for processors. [ tglx: Remove the unneeded exports ] Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com> Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com> Signed-off-by: Zhu Guihua <zhugh.fnst@cn.fujitsu.com> Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com> Acked-by: Ingo Molnar <mingo@kernel.org> Cc: mika.j.penttila@gmail.com Cc: len.brown@intel.com Cc: rafael@kernel.org Cc: rjw@rjwysocki.net Cc: yasu.isimatu@gmail.com Cc: linux-mm@kvack.org Cc: linux-acpi@vger.kernel.org Cc: isimatu.yasuaki@jp.fujitsu.com Cc: gongzhaogang@inspur.com Cc: tj@kernel.org Cc: izumi.taku@jp.fujitsu.com Cc: cl@linux.com Cc: chen.tang@easystack.cn Cc: akpm@linux-foundation.org Cc: kamezawa.hiroyu@jp.fujitsu.com Cc: lenb@kernel.org Link: http://lkml.kernel.org/r/1472114120-3281-6-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2016-08-25 15:35:18 +07:00
return 0;
}
int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id,
int *pcpu)
{
int cpu;
cpu = acpi_register_lapic(physid, acpi_id, ACPI_MADT_ENABLED);
if (cpu < 0) {
pr_info(PREFIX "Unable to map lapic to logical cpu number\n");
return cpu;
}
acpi_processor_set_pdc(handle);
acpi_map_cpu2node(handle, cpu, physid);
*pcpu = cpu;
return 0;
}
EXPORT_SYMBOL(acpi_map_cpu);
int acpi_unmap_cpu(int cpu)
{
cpu_hotplug: clear apicid to node when the cpu is hotremoved When a cpu is hotpluged, we call acpi_map_cpu2node() in _acpi_map_lsapic() to store the cpu's node and apicid's node. But we don't clear the cpu's node in acpi_unmap_lsapic() when this cpu is hotremoved. If the node is also hotremoved, we will get the following messages: kernel BUG at include/linux/gfp.h:329! invalid opcode: 0000 [#1] SMP Modules linked in: ebtable_nat ebtables ipt_MASQUERADE iptable_nat nf_nat xt_CHECKSUM iptable_mangle bridge stp llc sunrpc ipt_REJECT nf_conntrack_ipv4 nf_defrag_ipv4 iptable_filter ip_tables ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables binfmt_misc dm_mirror dm_region_hash dm_log dm_mod vhost_net macvtap macvlan tun uinput iTCO_wdt iTCO_vendor_support coretemp kvm_intel kvm crc32c_intel microcode pcspkr i2c_i801 i2c_core lpc_ich mfd_core ioatdma e1000e i7core_edac edac_core sg acpi_memhotplug igb dca sd_mod crc_t10dif megaraid_sas mptsas mptscsih mptbase scsi_transport_sas scsi_mod Pid: 3126, comm: init Not tainted 3.6.0-rc3-tangchen-hostbridge+ #13 FUJITSU-SV PRIMEQUEST 1800E/SB RIP: 0010:[<ffffffff811bc3fd>] [<ffffffff811bc3fd>] allocate_slab+0x28d/0x300 RSP: 0018:ffff88078a049cf8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000001 RDI: 0000000000000246 RBP: ffff88078a049d38 R08: 00000000000040d0 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000b5f R12: 00000000000052d0 R13: ffff8807c1417300 R14: 0000000000030038 R15: 0000000000000003 FS: 00007fa9b1b44700(0000) GS:ffff8807c3800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007fa9b09acca0 CR3: 000000078b855000 CR4: 00000000000007e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process init (pid: 3126, threadinfo ffff88078a048000, task ffff8807bb6f2650) Call Trace: new_slab+0x30/0x1b0 __slab_alloc+0x358/0x4c0 kmem_cache_alloc_node_trace+0xb4/0x1e0 alloc_fair_sched_group+0xd0/0x1b0 sched_create_group+0x3e/0x110 sched_autogroup_create_attach+0x4d/0x180 sys_setsid+0xd4/0xf0 system_call_fastpath+0x16/0x1b Code: 89 c4 e9 73 fe ff ff 31 c0 89 de 48 c7 c7 45 de 9e 81 44 89 45 c8 e8 22 05 4b 00 85 db 44 8b 45 c8 0f 89 4f ff ff ff 0f 0b eb fe <0f> 0b 90 eb fd 0f 0b eb fe 89 de 48 c7 c7 45 de 9e 81 31 c0 44 RIP [<ffffffff811bc3fd>] allocate_slab+0x28d/0x300 RSP <ffff88078a049cf8> ---[ end trace adf84c90f3fea3e5 ]--- The reason is that the cpu's node is not NUMA_NO_NODE, we will call alloc_pages_exact_node() to alloc memory on the node, but the node is offlined. If the node is onlined, we still need cpu's node. For example: a task on the cpu is sleeped when the cpu is hotremoved. We will choose another cpu to run this task when it is waked up. If we know the cpu's node, we will choose the cpu on the same node first. So we should clear cpu-to-node mapping when the node is offlined. This patch only clears apicid-to-node mapping when the cpu is hotremoved. [akpm@linux-foundation.org: fix section error] Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com> Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Cc: David Rientjes <rientjes@google.com> Cc: Jiang Liu <liuj97@gmail.com> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-23 07:33:24 +07:00
#ifdef CONFIG_ACPI_NUMA
set_apicid_to_node(per_cpu(x86_cpu_to_apicid, cpu), NUMA_NO_NODE);
#endif
per_cpu(x86_cpu_to_apicid, cpu) = -1;
set_cpu_present(cpu, false);
num_processors--;
return (0);
}
EXPORT_SYMBOL(acpi_unmap_cpu);
#endif /* CONFIG_ACPI_HOTPLUG_CPU */
int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
{
int ret = -ENOSYS;
#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
int ioapic_id;
u64 addr;
struct ioapic_domain_cfg cfg = {
.type = IOAPIC_DOMAIN_DYNAMIC,
.ops = &mp_ioapic_irqdomain_ops,
};
ioapic_id = acpi_get_ioapic_id(handle, gsi_base, &addr);
if (ioapic_id < 0) {
unsigned long long uid;
acpi_status status;
status = acpi_evaluate_integer(handle, METHOD_NAME__UID,
NULL, &uid);
if (ACPI_FAILURE(status)) {
acpi_handle_warn(handle, "failed to get IOAPIC ID.\n");
return -EINVAL;
}
ioapic_id = (int)uid;
}
mutex_lock(&acpi_ioapic_lock);
ret = mp_register_ioapic(ioapic_id, phys_addr, gsi_base, &cfg);
mutex_unlock(&acpi_ioapic_lock);
#endif
return ret;
}
EXPORT_SYMBOL(acpi_register_ioapic);
int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
{
int ret = -ENOSYS;
#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
mutex_lock(&acpi_ioapic_lock);
ret = mp_unregister_ioapic(gsi_base);
mutex_unlock(&acpi_ioapic_lock);
#endif
return ret;
}
EXPORT_SYMBOL(acpi_unregister_ioapic);
/**
* acpi_ioapic_registered - Check whether IOAPIC assoicatied with @gsi_base
* has been registered
* @handle: ACPI handle of the IOAPIC deivce
* @gsi_base: GSI base associated with the IOAPIC
*
* Assume caller holds some type of lock to serialize acpi_ioapic_registered()
* with acpi_register_ioapic()/acpi_unregister_ioapic().
*/
int acpi_ioapic_registered(acpi_handle handle, u32 gsi_base)
{
int ret = 0;
#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
mutex_lock(&acpi_ioapic_lock);
ret = mp_ioapic_registered(gsi_base);
mutex_unlock(&acpi_ioapic_lock);
#endif
return ret;
}
static int __init acpi_parse_sbf(struct acpi_table_header *table)
{
struct acpi_table_boot *sb = (struct acpi_table_boot *)table;
sbf_port = sb->cmos_index; /* Save CMOS port */
return 0;
}
#ifdef CONFIG_HPET_TIMER
#include <asm/hpet.h>
static struct resource *hpet_res __initdata;
static int __init acpi_parse_hpet(struct acpi_table_header *table)
{
struct acpi_table_hpet *hpet_tbl = (struct acpi_table_hpet *)table;
if (hpet_tbl->address.space_id != ACPI_SPACE_MEM) {
printk(KERN_WARNING PREFIX "HPET timers must be located in "
"memory.\n");
return -1;
}
hpet_address = hpet_tbl->address.address;
hpet_blockid = hpet_tbl->sequence;
/*
* Some broken BIOSes advertise HPET at 0x0. We really do not
* want to allocate a resource there.
*/
if (!hpet_address) {
printk(KERN_WARNING PREFIX
"HPET id: %#x base: %#lx is invalid\n",
hpet_tbl->id, hpet_address);
return 0;
}
#ifdef CONFIG_X86_64
/*
* Some even more broken BIOSes advertise HPET at
* 0xfed0000000000000 instead of 0xfed00000. Fix it up and add
* some noise:
*/
if (hpet_address == 0xfed0000000000000UL) {
if (!hpet_force_user) {
printk(KERN_WARNING PREFIX "HPET id: %#x "
"base: 0xfed0000000000000 is bogus\n "
"try hpet=force on the kernel command line to "
"fix it up to 0xfed00000.\n", hpet_tbl->id);
hpet_address = 0;
return 0;
}
printk(KERN_WARNING PREFIX
"HPET id: %#x base: 0xfed0000000000000 fixed up "
"to 0xfed00000.\n", hpet_tbl->id);
hpet_address >>= 32;
}
#endif
printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
hpet_tbl->id, hpet_address);
/*
* Allocate and initialize the HPET firmware resource for adding into
* the resource tree during the lateinit timeframe.
*/
#define HPET_RESOURCE_NAME_SIZE 9
hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE);
hpet_res->name = (void *)&hpet_res[1];
hpet_res->flags = IORESOURCE_MEM;
snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, "HPET %u",
hpet_tbl->sequence);
hpet_res->start = hpet_address;
hpet_res->end = hpet_address + (1 * 1024) - 1;
return 0;
}
/*
* hpet_insert_resource inserts the HPET resources used into the resource
* tree.
*/
static __init int hpet_insert_resource(void)
{
if (!hpet_res)
return 1;
return insert_resource(&iomem_resource, hpet_res);
}
late_initcall(hpet_insert_resource);
#else
#define acpi_parse_hpet NULL
#endif
static int __init acpi_parse_fadt(struct acpi_table_header *table)
{
x86/ACPI: Parse ACPI_FADT_LEGACY_DEVICES ACPI 5.2.9.3 IA-PC Boot Architecture flag ACPI_FADT_LEGACY_DEVICES can be used to determine if a system has legacy devices LPC or ISA devices. The x86 platform already has a struct which lists known associated legacy devices, we start off careful only by disabling root devices we should not regress with. The struct and device list can be expanded with time to cover more root legacy components. Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: andrew.cooper3@citrix.com Cc: andriy.shevchenko@linux.intel.com Cc: bigeasy@linutronix.de Cc: boris.ostrovsky@oracle.com Cc: david.vrabel@citrix.com Cc: ffainelli@freebox.fr Cc: george.dunlap@citrix.com Cc: glin@suse.com Cc: jgross@suse.com Cc: jlee@suse.com Cc: josh@joshtriplett.org Cc: julien.grall@linaro.org Cc: konrad.wilk@oracle.com Cc: kozerkov@parallels.com Cc: lenb@kernel.org Cc: lguest@lists.ozlabs.org Cc: linux-acpi@vger.kernel.org Cc: lv.zheng@intel.com Cc: matt@codeblueprint.co.uk Cc: mbizon@freebox.fr Cc: rjw@rjwysocki.net Cc: robert.moore@intel.com Cc: rusty@rustcorp.com.au Cc: tiwai@suse.de Cc: toshi.kani@hp.com Cc: xen-devel@lists.xensource.com Link: http://lkml.kernel.org/r/1460592286-300-13-git-send-email-mcgrof@kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-04-14 07:04:42 +07:00
if (!(acpi_gbl_FADT.boot_flags & ACPI_FADT_LEGACY_DEVICES)) {
pr_debug("ACPI: no legacy devices present\n");
x86_platform.legacy.devices.pnpbios = 0;
}
if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
!(acpi_gbl_FADT.boot_flags & ACPI_FADT_8042) &&
x86_platform.legacy.i8042 != X86_LEGACY_I8042_PLATFORM_ABSENT) {
pr_debug("ACPI: i8042 controller is absent\n");
x86_platform.legacy.i8042 = X86_LEGACY_I8042_FIRMWARE_ABSENT;
}
x86/ACPI: Move ACPI_FADT_NO_CMOS_RTC check to ACPI boot code This moves the ACPI specific check into the ACPI boot code, it also takes advantage of the x86_platform.legacy.rtc which is checked for already on the RTC initialization code. This lets us remove the nasty #ifdefery and consolidate the checks to use only one toggle to disable the RTC init code. The works as RTC is initialized by device_initcall(add_rtc_cmos), this will run late in boot on start_kernel() during rest_init(), acpi_parse_fadt() gets called earlier during setup_arch(). Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: andrew.cooper3@citrix.com Cc: andriy.shevchenko@linux.intel.com Cc: bigeasy@linutronix.de Cc: boris.ostrovsky@oracle.com Cc: david.vrabel@citrix.com Cc: ffainelli@freebox.fr Cc: george.dunlap@citrix.com Cc: glin@suse.com Cc: jgross@suse.com Cc: jlee@suse.com Cc: josh@joshtriplett.org Cc: julien.grall@linaro.org Cc: konrad.wilk@oracle.com Cc: kozerkov@parallels.com Cc: lenb@kernel.org Cc: lguest@lists.ozlabs.org Cc: linux-acpi@vger.kernel.org Cc: lv.zheng@intel.com Cc: matt@codeblueprint.co.uk Cc: mbizon@freebox.fr Cc: rjw@rjwysocki.net Cc: robert.moore@intel.com Cc: rusty@rustcorp.com.au Cc: tiwai@suse.de Cc: toshi.kani@hp.com Cc: xen-devel@lists.xensource.com Link: http://lkml.kernel.org/r/1460592286-300-6-git-send-email-mcgrof@kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-04-14 07:04:35 +07:00
if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) {
pr_debug("ACPI: not registering RTC platform device\n");
x86_platform.legacy.rtc = 0;
}
if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_VGA) {
pr_debug("ACPI: probing for VGA not safe\n");
x86_platform.legacy.no_vga = 1;
}
#ifdef CONFIG_X86_PM_TIMER
/* detect the location of the ACPI PM Timer */
if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) {
/* FADT rev. 2 */
if (acpi_gbl_FADT.xpm_timer_block.space_id !=
ACPI_ADR_SPACE_SYSTEM_IO)
return 0;
pmtmr_ioport = acpi_gbl_FADT.xpm_timer_block.address;
/*
* "X" fields are optional extensions to the original V1.0
* fields, so we must selectively expand V1.0 fields if the
* corresponding X field is zero.
*/
if (!pmtmr_ioport)
pmtmr_ioport = acpi_gbl_FADT.pm_timer_block;
} else {
/* FADT rev. 1 */
pmtmr_ioport = acpi_gbl_FADT.pm_timer_block;
}
if (pmtmr_ioport)
printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n",
pmtmr_ioport);
#endif
return 0;
}
#ifdef CONFIG_X86_LOCAL_APIC
/*
* Parse LAPIC entries in MADT
* returns 0 on success, < 0 on error
*/
static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
{
int count;
if (!boot_cpu_has(X86_FEATURE_APIC))
return -ENODEV;
/*
* Note that the LAPIC address is obtained from the MADT (32-bit value)
* and (optionally) overridden by a LAPIC_ADDR_OVR entry (64-bit value).
*/
count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,
acpi_parse_lapic_addr_ovr, 0);
if (count < 0) {
printk(KERN_ERR PREFIX
"Error parsing LAPIC address override entry\n");
return count;
}
register_lapic_address(acpi_lapic_addr);
return count;
}
static int __init acpi_parse_madt_lapic_entries(void)
{
int count;
int x2count = 0;
x86, ACPI: Handle apic/x2apic entries in MADT in correct order ACPI specifies the following rules when listing APIC IDs: (1) Boot processor is listed first (2) For multi-threaded processors, BIOS should list the first logical processor of each of the individual multi-threaded processors in MADT before listing any of the second logical processors. (3) APIC IDs < 0xFF should be listed in APIC subtable, APIC IDs >= 0xFF should be listed in X2APIC subtable Because of above, when there's more than 0xFF logical CPUs, BIOS interleaves APIC/X2APIC subtables. Assuming, there's 72 cores, 72 hyper-threads each, 288 CPUs total, listing is like this: APIC (0,4,8, .., 252) X2APIC (258,260,264, .. 284) APIC (1,5,9,...,253) X2APIC (259,261,265,...,285) APIC (2,6,10,...,254) X2APIC (260,262,266,..,286) APIC (3,7,11,...,251) X2APIC (255,261,262,266,..,287) Now, before this patch, due to how ACPI MADT subtables were parsed (BSP then X2APIC then APIC), kernel enumerated CPUs in reverted order (i.e. high APIC IDs were getting low logical IDs, and low APIC IDs were getting high logical IDs). This is wrong for the following reasons: () it's hard to predict how cores and threads are enumerated () when it's hard to predict, s/w threads cannot be properly affinitized causing significant performance impact due to e.g. inproper cache sharing () enumeration is inconsistent with how threads are enumerated on other Intel Xeon processors So, order in which MADT APIC/X2APIC handlers are passed is reverse and both handlers are passed to be called during same MADT table to walk to achieve correct CPU enumeration. In scenario when someone boots kernel with options 'maxcpus=72 nox2apic', in result less cores may be booted, since some of the CPUs the kernel will try to use will have APIC ID >= 0xFF. In such case, one should not pass 'nox2apic'. Disclimer: code parsing MADT APIC/X2APIC has not been touched since 2009, when X2APIC support was initially added. I do not know why MADT parsing code was added in the reversed order in the first place. I guess it didn't matter at that time since nobody cared about cores with APIC IDs >= 0xFF, right? This patch is based on work of "Yinghai Lu <yinghai@kernel.org>" previously published at https://lkml.org/lkml/2013/1/21/563 Here's the explanation why parsing interface needs to be changed and why simpler approach will not work https://lkml.org/lkml/2015/9/7/285 Signed-off-by: Lukasz Anaczkowski <lukasz.anaczkowski@intel.com> Acked-by: Thomas Gleixner <tglx@linutronix.de> (commit message) Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2015-09-09 20:47:29 +07:00
int ret;
struct acpi_subtable_proc madt_proc[2];
if (!boot_cpu_has(X86_FEATURE_APIC))
return -ENODEV;
count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC,
acpi_parse_sapic, MAX_LOCAL_APIC);
if (!count) {
x86, ACPI: Handle apic/x2apic entries in MADT in correct order ACPI specifies the following rules when listing APIC IDs: (1) Boot processor is listed first (2) For multi-threaded processors, BIOS should list the first logical processor of each of the individual multi-threaded processors in MADT before listing any of the second logical processors. (3) APIC IDs < 0xFF should be listed in APIC subtable, APIC IDs >= 0xFF should be listed in X2APIC subtable Because of above, when there's more than 0xFF logical CPUs, BIOS interleaves APIC/X2APIC subtables. Assuming, there's 72 cores, 72 hyper-threads each, 288 CPUs total, listing is like this: APIC (0,4,8, .., 252) X2APIC (258,260,264, .. 284) APIC (1,5,9,...,253) X2APIC (259,261,265,...,285) APIC (2,6,10,...,254) X2APIC (260,262,266,..,286) APIC (3,7,11,...,251) X2APIC (255,261,262,266,..,287) Now, before this patch, due to how ACPI MADT subtables were parsed (BSP then X2APIC then APIC), kernel enumerated CPUs in reverted order (i.e. high APIC IDs were getting low logical IDs, and low APIC IDs were getting high logical IDs). This is wrong for the following reasons: () it's hard to predict how cores and threads are enumerated () when it's hard to predict, s/w threads cannot be properly affinitized causing significant performance impact due to e.g. inproper cache sharing () enumeration is inconsistent with how threads are enumerated on other Intel Xeon processors So, order in which MADT APIC/X2APIC handlers are passed is reverse and both handlers are passed to be called during same MADT table to walk to achieve correct CPU enumeration. In scenario when someone boots kernel with options 'maxcpus=72 nox2apic', in result less cores may be booted, since some of the CPUs the kernel will try to use will have APIC ID >= 0xFF. In such case, one should not pass 'nox2apic'. Disclimer: code parsing MADT APIC/X2APIC has not been touched since 2009, when X2APIC support was initially added. I do not know why MADT parsing code was added in the reversed order in the first place. I guess it didn't matter at that time since nobody cared about cores with APIC IDs >= 0xFF, right? This patch is based on work of "Yinghai Lu <yinghai@kernel.org>" previously published at https://lkml.org/lkml/2013/1/21/563 Here's the explanation why parsing interface needs to be changed and why simpler approach will not work https://lkml.org/lkml/2015/9/7/285 Signed-off-by: Lukasz Anaczkowski <lukasz.anaczkowski@intel.com> Acked-by: Thomas Gleixner <tglx@linutronix.de> (commit message) Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2015-09-09 20:47:29 +07:00
memset(madt_proc, 0, sizeof(madt_proc));
madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC;
madt_proc[0].handler = acpi_parse_lapic;
madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC;
madt_proc[1].handler = acpi_parse_x2apic;
ret = acpi_table_parse_entries_array(ACPI_SIG_MADT,
sizeof(struct acpi_table_madt),
madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC);
if (ret < 0) {
printk(KERN_ERR PREFIX
"Error parsing LAPIC/X2APIC entries\n");
return ret;
}
count = madt_proc[0].count;
x2count = madt_proc[1].count;
}
if (!count && !x2count) {
printk(KERN_ERR PREFIX "No LAPIC entries present\n");
/* TBD: Cleanup to allow fallback to MPS */
return -ENODEV;
} else if (count < 0 || x2count < 0) {
printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
/* TBD: Cleanup to allow fallback to MPS */
return count;
}
x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI,
acpi_parse_x2apic_nmi, 0);
count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI,
acpi_parse_lapic_nmi, 0);
if (count < 0 || x2count < 0) {
printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
/* TBD: Cleanup to allow fallback to MPS */
return count;
}
return 0;
}
#endif /* CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_IO_APIC
x86, mpparse: Simplify arch/x86/include/asm/mpspec.h Simplify arch/x86/include/asm/mpspec.h by 1) Change max_physical_apicid to static as it's only used in apic.c. 2) Kill declaration of mpc_default_type, it's never defined. 3) Delete default_acpi_madt_oem_check(), it has already been declared in apic.h. 4) Make default_acpi_madt_oem_check() depends on CONFIG_X86_LOCAL_APIC instead of CONFIG_X86_64 to support i386. 5) Change mp_override_legacy_irq(), mp_config_acpi_legacy_irqs() and mp_register_gsi() as static because they are only used in acpi/boot.c. Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com> Acked-by: David Rientjes <rientjes@google.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Joerg Roedel <joro@8bytes.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: H. Peter Anvin <hpa@linux.intel.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Grant Likely <grant.likely@linaro.org> Cc: Rafael J. Wysocki <rjw@rjwysocki.net> Cc: Bjorn Helgaas <bhelgaas@google.com> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Len Brown <len.brown@intel.com> Cc: Pavel Machek <pavel@ucw.cz> Cc: Seiji Aguchi <seiji.aguchi@hds.com> Cc: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com> Cc: Paul Gortmaker <paul.gortmaker@windriver.com> Cc: Richard Weinberger <richard@nod.at> Cc: Andi Kleen <ak@linux.intel.com> Link: http://lkml.kernel.org/r/1402302011-23642-4-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2014-06-09 15:19:32 +07:00
static void __init mp_config_acpi_legacy_irqs(void)
{
int i;
struct mpc_intsrc mp_irq;
#ifdef CONFIG_EISA
/*
* Fabricate the legacy ISA bus (bus #31).
*/
mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
#endif
set_bit(MP_ISA_BUS, mp_bus_not_pci);
pr_debug("Bus #%d is ISA (nIRQs: %d)\n", MP_ISA_BUS, nr_legacy_irqs());
/*
* Use the default configuration for the IRQs 0-15. Unless
* overridden by (MADT) interrupt source override entries.
*/
for (i = 0; i < nr_legacy_irqs(); i++) {
int ioapic, pin;
unsigned int dstapic;
int idx;
u32 gsi;
/* Locate the gsi that irq i maps to. */
if (acpi_isa_irq_to_gsi(i, &gsi))
continue;
/*
* Locate the IOAPIC that manages the ISA IRQ.
*/
ioapic = mp_find_ioapic(gsi);
if (ioapic < 0)
continue;
pin = mp_find_ioapic_pin(ioapic, gsi);
dstapic = mpc_ioapic_id(ioapic);
for (idx = 0; idx < mp_irq_entries; idx++) {
struct mpc_intsrc *irq = mp_irqs + idx;
/* Do we already have a mapping for this ISA IRQ? */
if (irq->srcbus == MP_ISA_BUS && irq->srcbusirq == i)
break;
/* Do we already have a mapping for this IOAPIC pin */
if (irq->dstapic == dstapic && irq->dstirq == pin)
break;
}
if (idx != mp_irq_entries) {
printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
continue; /* IRQ already used */
}
mp_irq.type = MP_INTSRC;
mp_irq.irqflag = 0; /* Conforming */
mp_irq.srcbus = MP_ISA_BUS;
mp_irq.dstapic = dstapic;
mp_irq.irqtype = mp_INT;
mp_irq.srcbusirq = i; /* Identity mapped */
mp_irq.dstirq = pin;
mp_save_irq(&mp_irq);
}
}
/*
* Parse IOAPIC related entries in MADT
* returns 0 on success, < 0 on error
*/
static int __init acpi_parse_madt_ioapic_entries(void)
{
int count;
/*
* ACPI interpreter is required to complete interrupt setup,
* so if it is off, don't enumerate the io-apics with ACPI.
* If MPS is present, it will handle them,
* otherwise the system will stay in PIC mode
*/
if (acpi_disabled || acpi_noirq)
return -ENODEV;
if (!boot_cpu_has(X86_FEATURE_APIC))
return -ENODEV;
/*
* if "noapic" boot option, don't look for IO-APICs
*/
if (skip_ioapic_setup) {
printk(KERN_INFO PREFIX "Skipping IOAPIC probe "
"due to 'noapic' option.\n");
return -ENODEV;
}
count = acpi_table_parse_madt(ACPI_MADT_TYPE_IO_APIC, acpi_parse_ioapic,
MAX_IO_APICS);
if (!count) {
printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
return -ENODEV;
} else if (count < 0) {
printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n");
return count;
}
count = acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE,
acpi_parse_int_src_ovr, nr_irqs);
if (count < 0) {
printk(KERN_ERR PREFIX
"Error parsing interrupt source overrides entry\n");
/* TBD: Cleanup to allow fallback to MPS */
return count;
}
/*
* If BIOS did not supply an INT_SRC_OVR for the SCI
* pretend we got one so we can set the SCI flags.
* But ignore setting up SCI on hardware reduced platforms.
*/
if (acpi_sci_override_gsi == INVALID_ACPI_IRQ && !acpi_gbl_reduced_hardware)
acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0,
acpi_gbl_FADT.sci_interrupt);
/* Fill in identity legacy mappings where no override */
mp_config_acpi_legacy_irqs();
count = acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE,
acpi_parse_nmi_src, nr_irqs);
if (count < 0) {
printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
/* TBD: Cleanup to allow fallback to MPS */
return count;
}
return 0;
}
#else
static inline int acpi_parse_madt_ioapic_entries(void)
{
return -1;
}
#endif /* !CONFIG_X86_IO_APIC */
static void __init early_acpi_process_madt(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
int error;
if (!acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) {
/*
* Parse MADT LAPIC entries
*/
error = early_acpi_parse_madt_lapic_addr_ovr();
if (!error) {
acpi_lapic = 1;
smp_found_config = 1;
}
if (error == -EINVAL) {
/*
* Dell Precision Workstation 410, 610 come here.
*/
printk(KERN_ERR PREFIX
"Invalid BIOS MADT, disabling ACPI\n");
disable_acpi();
}
}
#endif
}
static void __init acpi_process_madt(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
int error;
if (!acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) {
/*
* Parse MADT LAPIC entries
*/
error = acpi_parse_madt_lapic_entries();
if (!error) {
acpi_lapic = 1;
/*
* Parse MADT IO-APIC entries
*/
mutex_lock(&acpi_ioapic_lock);
error = acpi_parse_madt_ioapic_entries();
mutex_unlock(&acpi_ioapic_lock);
if (!error) {
acpi_set_irq_model_ioapic();
smp_found_config = 1;
}
}
if (error == -EINVAL) {
/*
* Dell Precision Workstation 410, 610 come here.
*/
printk(KERN_ERR PREFIX
"Invalid BIOS MADT, disabling ACPI\n");
disable_acpi();
}
} else {
/*
* ACPI found no MADT, and so ACPI wants UP PIC mode.
* In the event an MPS table was found, forget it.
* Boot with "acpi=off" to use MPS on such a system.
*/
if (smp_found_config) {
printk(KERN_WARNING PREFIX
"No APIC-table, disabling MPS\n");
smp_found_config = 0;
}
}
/*
* ACPI supports both logical (e.g. Hyper-Threading) and physical
* processors, where MPS only supports physical.
*/
if (acpi_lapic && acpi_ioapic)
printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
"information\n");
else if (acpi_lapic)
printk(KERN_INFO "Using ACPI for processor (LAPIC) "
"configuration information\n");
#endif
return;
}
static int __init disable_acpi_irq(const struct dmi_system_id *d)
{
if (!acpi_force) {
printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n",
d->ident);
acpi_noirq_set();
}
return 0;
}
static int __init disable_acpi_pci(const struct dmi_system_id *d)
{
if (!acpi_force) {
printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n",
d->ident);
acpi_disable_pci();
}
return 0;
}
static int __init dmi_disable_acpi(const struct dmi_system_id *d)
{
if (!acpi_force) {
printk(KERN_NOTICE "%s detected: acpi off\n", d->ident);
disable_acpi();
} else {
printk(KERN_NOTICE
"Warning: DMI blacklist says broken, but acpi forced\n");
}
return 0;
}
x86: fix C1E && nx6325 stability problem The problems are that, with the ACPI vs timer overring issue _fixed_, after using the box for some time (between several seconds and 1 hour, at random) processes get very high CPU loads (once I've got X using 107% of the CPU, for example) and the system becomes unresponsive, as though there were interrupts lost or something similar. Andreas Herrman reproduced similar problems: > Ok, now I've reproduced the stability problem. > - Using tip/master, > - reverting e38502eb8aa82314d5ab0eba45f50e6790dadd88 and > - applying your patch from this posting > http://marc.info/?l=linux-kernel&m=121539354224562&w=4 > > Starting X, firefox, gimp, tuxpaint and doing some drawing in tuxpaint > results in a slow system. Drawing is almost not possible anymore -- > Selections of new colors, cursors etc. is performed with huge delay > if it's performed at all. > > BTW, the code sets up timer IRQ as Virtual Wire IRQ: > > Jul 8 14:57:58 kodscha IO-APIC (apicid-pin) 2-22, 2-23 not connected. > Jul 8 14:57:58 kodscha ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1 > Jul 8 14:57:58 kodscha ...trying to set up timer as Virtual Wire IRQ... works. > > and both INT0 and INT2 of IOAPIC are masked: > > Jul 8 14:57:58 kodscha NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect: > Jul 8 14:57:58 kodscha 00 000 1 0 0 0 0 0 0 00 > Jul 8 14:57:58 kodscha 01 003 0 0 0 0 0 1 1 31 > Jul 8 14:57:58 kodscha 02 003 1 0 0 0 0 0 0 30 > > I've also seen strange CPU utilization -- with syslog-ng: > > top - 15:33:06 up 35 min, 4 users, load average: 1.70, 0.68, 0.37 > Tasks: 64 total, 4 running, 60 sleeping, 0 stopped, 0 zombie > Cpu0 : 0.0%us,100.0%sy, 0.0%ni, 0.0%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st > Cpu1 : 6.4%us, 87.2%sy, 0.0%ni, 5.8%id, 0.0%wa, 0.6%hi, 0.0%si, 0.0%st > Mem: 895384k total, 283568k used, 611816k free, 35492k buffers > Swap: 1959920k total, 0k used, 1959920k free, 163044k cached > > PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND > 4632 root 20 0 17216 800 580 S 104 0.1 0:34.22 syslog-ng > 28505 root 20 0 205m 11m 4024 S 6 1.3 0:21.16 X > 28518 root 20 0 56292 5652 4492 S 1 0.6 0:01.80 fluxbox > 1 root 20 0 3724 608 508 S 0 0.1 0:00.36 init > > So far I have no clue why C1E-idle in conjunction with virtual wire > mode causes this strange behaviour. > > ... and I start to think about the root cause of all this. > > I've performed similar tests under X with the IRQ0/INT0 configuration and > I did not see above symptoms. So lets fall back to the IRQ0/INT0 configuration on this box. This basically restores the dont-use-the-lapic-timer exception mechanism that was unconditional on this box prior commit 8750bf5 ("x86: add C1E aware idle function"). Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-08 21:12:26 +07:00
/*
* Force ignoring BIOS IRQ0 override
x86: fix C1E && nx6325 stability problem The problems are that, with the ACPI vs timer overring issue _fixed_, after using the box for some time (between several seconds and 1 hour, at random) processes get very high CPU loads (once I've got X using 107% of the CPU, for example) and the system becomes unresponsive, as though there were interrupts lost or something similar. Andreas Herrman reproduced similar problems: > Ok, now I've reproduced the stability problem. > - Using tip/master, > - reverting e38502eb8aa82314d5ab0eba45f50e6790dadd88 and > - applying your patch from this posting > http://marc.info/?l=linux-kernel&m=121539354224562&w=4 > > Starting X, firefox, gimp, tuxpaint and doing some drawing in tuxpaint > results in a slow system. Drawing is almost not possible anymore -- > Selections of new colors, cursors etc. is performed with huge delay > if it's performed at all. > > BTW, the code sets up timer IRQ as Virtual Wire IRQ: > > Jul 8 14:57:58 kodscha IO-APIC (apicid-pin) 2-22, 2-23 not connected. > Jul 8 14:57:58 kodscha ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1 > Jul 8 14:57:58 kodscha ...trying to set up timer as Virtual Wire IRQ... works. > > and both INT0 and INT2 of IOAPIC are masked: > > Jul 8 14:57:58 kodscha NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect: > Jul 8 14:57:58 kodscha 00 000 1 0 0 0 0 0 0 00 > Jul 8 14:57:58 kodscha 01 003 0 0 0 0 0 1 1 31 > Jul 8 14:57:58 kodscha 02 003 1 0 0 0 0 0 0 30 > > I've also seen strange CPU utilization -- with syslog-ng: > > top - 15:33:06 up 35 min, 4 users, load average: 1.70, 0.68, 0.37 > Tasks: 64 total, 4 running, 60 sleeping, 0 stopped, 0 zombie > Cpu0 : 0.0%us,100.0%sy, 0.0%ni, 0.0%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st > Cpu1 : 6.4%us, 87.2%sy, 0.0%ni, 5.8%id, 0.0%wa, 0.6%hi, 0.0%si, 0.0%st > Mem: 895384k total, 283568k used, 611816k free, 35492k buffers > Swap: 1959920k total, 0k used, 1959920k free, 163044k cached > > PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND > 4632 root 20 0 17216 800 580 S 104 0.1 0:34.22 syslog-ng > 28505 root 20 0 205m 11m 4024 S 6 1.3 0:21.16 X > 28518 root 20 0 56292 5652 4492 S 1 0.6 0:01.80 fluxbox > 1 root 20 0 3724 608 508 S 0 0.1 0:00.36 init > > So far I have no clue why C1E-idle in conjunction with virtual wire > mode causes this strange behaviour. > > ... and I start to think about the root cause of all this. > > I've performed similar tests under X with the IRQ0/INT0 configuration and > I did not see above symptoms. So lets fall back to the IRQ0/INT0 configuration on this box. This basically restores the dont-use-the-lapic-timer exception mechanism that was unconditional on this box prior commit 8750bf5 ("x86: add C1E aware idle function"). Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-08 21:12:26 +07:00
*/
static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
{
if (!acpi_skip_timer_override) {
pr_notice("%s detected: Ignoring BIOS IRQ0 override\n",
d->ident);
acpi_skip_timer_override = 1;
}
x86: fix C1E && nx6325 stability problem The problems are that, with the ACPI vs timer overring issue _fixed_, after using the box for some time (between several seconds and 1 hour, at random) processes get very high CPU loads (once I've got X using 107% of the CPU, for example) and the system becomes unresponsive, as though there were interrupts lost or something similar. Andreas Herrman reproduced similar problems: > Ok, now I've reproduced the stability problem. > - Using tip/master, > - reverting e38502eb8aa82314d5ab0eba45f50e6790dadd88 and > - applying your patch from this posting > http://marc.info/?l=linux-kernel&m=121539354224562&w=4 > > Starting X, firefox, gimp, tuxpaint and doing some drawing in tuxpaint > results in a slow system. Drawing is almost not possible anymore -- > Selections of new colors, cursors etc. is performed with huge delay > if it's performed at all. > > BTW, the code sets up timer IRQ as Virtual Wire IRQ: > > Jul 8 14:57:58 kodscha IO-APIC (apicid-pin) 2-22, 2-23 not connected. > Jul 8 14:57:58 kodscha ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1 > Jul 8 14:57:58 kodscha ...trying to set up timer as Virtual Wire IRQ... works. > > and both INT0 and INT2 of IOAPIC are masked: > > Jul 8 14:57:58 kodscha NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect: > Jul 8 14:57:58 kodscha 00 000 1 0 0 0 0 0 0 00 > Jul 8 14:57:58 kodscha 01 003 0 0 0 0 0 1 1 31 > Jul 8 14:57:58 kodscha 02 003 1 0 0 0 0 0 0 30 > > I've also seen strange CPU utilization -- with syslog-ng: > > top - 15:33:06 up 35 min, 4 users, load average: 1.70, 0.68, 0.37 > Tasks: 64 total, 4 running, 60 sleeping, 0 stopped, 0 zombie > Cpu0 : 0.0%us,100.0%sy, 0.0%ni, 0.0%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st > Cpu1 : 6.4%us, 87.2%sy, 0.0%ni, 5.8%id, 0.0%wa, 0.6%hi, 0.0%si, 0.0%st > Mem: 895384k total, 283568k used, 611816k free, 35492k buffers > Swap: 1959920k total, 0k used, 1959920k free, 163044k cached > > PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND > 4632 root 20 0 17216 800 580 S 104 0.1 0:34.22 syslog-ng > 28505 root 20 0 205m 11m 4024 S 6 1.3 0:21.16 X > 28518 root 20 0 56292 5652 4492 S 1 0.6 0:01.80 fluxbox > 1 root 20 0 3724 608 508 S 0 0.1 0:00.36 init > > So far I have no clue why C1E-idle in conjunction with virtual wire > mode causes this strange behaviour. > > ... and I start to think about the root cause of all this. > > I've performed similar tests under X with the IRQ0/INT0 configuration and > I did not see above symptoms. So lets fall back to the IRQ0/INT0 configuration on this box. This basically restores the dont-use-the-lapic-timer exception mechanism that was unconditional on this box prior commit 8750bf5 ("x86: add C1E aware idle function"). Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-08 21:12:26 +07:00
return 0;
}
/*
* ACPI offers an alternative platform interface model that removes
* ACPI hardware requirements for platforms that do not implement
* the PC Architecture.
*
* We initialize the Hardware-reduced ACPI model here:
*/
static void __init acpi_reduced_hw_init(void)
{
if (acpi_gbl_reduced_hardware) {
/*
* Override x86_init functions and bypass legacy pic
* in Hardware-reduced ACPI mode
*/
x86_init.timers.timer_init = x86_init_noop;
x86_init.irqs.pre_vector_init = x86_init_noop;
legacy_pic = &null_legacy_pic;
}
}
/*
* If your system is blacklisted here, but you find that acpi=force
* works for you, please contact linux-acpi@vger.kernel.org
*/
static const struct dmi_system_id acpi_dmi_table[] __initconst = {
/*
* Boxes that need ACPI disabled
*/
{
.callback = dmi_disable_acpi,
.ident = "IBM Thinkpad",
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
DMI_MATCH(DMI_BOARD_NAME, "2629H1G"),
},
},
/*
* Boxes that need ACPI PCI IRQ routing disabled
*/
{
.callback = disable_acpi_irq,
.ident = "ASUS A7V",
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"),
DMI_MATCH(DMI_BOARD_NAME, "<A7V>"),
/* newer BIOS, Revision 1011, does work */
DMI_MATCH(DMI_BIOS_VERSION,
"ASUS A7V ACPI BIOS Revision 1007"),
},
},
{
/*
* Latest BIOS for IBM 600E (1.16) has bad pcinum
* for LPC bridge, which is needed for the PCI
* interrupt links to work. DSDT fix is in bug 5966.
* 2645, 2646 model numbers are shared with 600/600E/600X
*/
.callback = disable_acpi_irq,
.ident = "IBM Thinkpad 600 Series 2645",
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
DMI_MATCH(DMI_BOARD_NAME, "2645"),
},
},
{
.callback = disable_acpi_irq,
.ident = "IBM Thinkpad 600 Series 2646",
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
DMI_MATCH(DMI_BOARD_NAME, "2646"),
},
},
/*
* Boxes that need ACPI PCI IRQ routing and PCI scan disabled
*/
{ /* _BBN 0 bug */
.callback = disable_acpi_pci,
.ident = "ASUS PR-DLS",
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"),
DMI_MATCH(DMI_BIOS_VERSION,
"ASUS PR-DLS ACPI BIOS Revision 1010"),
DMI_MATCH(DMI_BIOS_DATE, "03/21/2003")
},
},
{
.callback = disable_acpi_pci,
.ident = "Acer TravelMate 36x Laptop",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"),
},
},
{}
};
/* second table for DMI checks that should run after early-quirks */
static const struct dmi_system_id acpi_dmi_table_late[] __initconst = {
x86: fix C1E && nx6325 stability problem The problems are that, with the ACPI vs timer overring issue _fixed_, after using the box for some time (between several seconds and 1 hour, at random) processes get very high CPU loads (once I've got X using 107% of the CPU, for example) and the system becomes unresponsive, as though there were interrupts lost or something similar. Andreas Herrman reproduced similar problems: > Ok, now I've reproduced the stability problem. > - Using tip/master, > - reverting e38502eb8aa82314d5ab0eba45f50e6790dadd88 and > - applying your patch from this posting > http://marc.info/?l=linux-kernel&m=121539354224562&w=4 > > Starting X, firefox, gimp, tuxpaint and doing some drawing in tuxpaint > results in a slow system. Drawing is almost not possible anymore -- > Selections of new colors, cursors etc. is performed with huge delay > if it's performed at all. > > BTW, the code sets up timer IRQ as Virtual Wire IRQ: > > Jul 8 14:57:58 kodscha IO-APIC (apicid-pin) 2-22, 2-23 not connected. > Jul 8 14:57:58 kodscha ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1 > Jul 8 14:57:58 kodscha ...trying to set up timer as Virtual Wire IRQ... works. > > and both INT0 and INT2 of IOAPIC are masked: > > Jul 8 14:57:58 kodscha NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect: > Jul 8 14:57:58 kodscha 00 000 1 0 0 0 0 0 0 00 > Jul 8 14:57:58 kodscha 01 003 0 0 0 0 0 1 1 31 > Jul 8 14:57:58 kodscha 02 003 1 0 0 0 0 0 0 30 > > I've also seen strange CPU utilization -- with syslog-ng: > > top - 15:33:06 up 35 min, 4 users, load average: 1.70, 0.68, 0.37 > Tasks: 64 total, 4 running, 60 sleeping, 0 stopped, 0 zombie > Cpu0 : 0.0%us,100.0%sy, 0.0%ni, 0.0%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st > Cpu1 : 6.4%us, 87.2%sy, 0.0%ni, 5.8%id, 0.0%wa, 0.6%hi, 0.0%si, 0.0%st > Mem: 895384k total, 283568k used, 611816k free, 35492k buffers > Swap: 1959920k total, 0k used, 1959920k free, 163044k cached > > PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND > 4632 root 20 0 17216 800 580 S 104 0.1 0:34.22 syslog-ng > 28505 root 20 0 205m 11m 4024 S 6 1.3 0:21.16 X > 28518 root 20 0 56292 5652 4492 S 1 0.6 0:01.80 fluxbox > 1 root 20 0 3724 608 508 S 0 0.1 0:00.36 init > > So far I have no clue why C1E-idle in conjunction with virtual wire > mode causes this strange behaviour. > > ... and I start to think about the root cause of all this. > > I've performed similar tests under X with the IRQ0/INT0 configuration and > I did not see above symptoms. So lets fall back to the IRQ0/INT0 configuration on this box. This basically restores the dont-use-the-lapic-timer exception mechanism that was unconditional on this box prior commit 8750bf5 ("x86: add C1E aware idle function"). Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-08 21:12:26 +07:00
/*
* HP laptops which use a DSDT reporting as HP/SB400/10000,
* which includes some code which overrides all temperature
* trip points to 16C if the INTIN2 input of the I/O APIC
* is enabled. This input is incorrectly designated the
* ISA IRQ 0 via an interrupt source override even though
* it is wired to the output of the master 8259A and INTIN0
* is not connected at all. Force ignoring BIOS IRQ0
x86: fix C1E && nx6325 stability problem The problems are that, with the ACPI vs timer overring issue _fixed_, after using the box for some time (between several seconds and 1 hour, at random) processes get very high CPU loads (once I've got X using 107% of the CPU, for example) and the system becomes unresponsive, as though there were interrupts lost or something similar. Andreas Herrman reproduced similar problems: > Ok, now I've reproduced the stability problem. > - Using tip/master, > - reverting e38502eb8aa82314d5ab0eba45f50e6790dadd88 and > - applying your patch from this posting > http://marc.info/?l=linux-kernel&m=121539354224562&w=4 > > Starting X, firefox, gimp, tuxpaint and doing some drawing in tuxpaint > results in a slow system. Drawing is almost not possible anymore -- > Selections of new colors, cursors etc. is performed with huge delay > if it's performed at all. > > BTW, the code sets up timer IRQ as Virtual Wire IRQ: > > Jul 8 14:57:58 kodscha IO-APIC (apicid-pin) 2-22, 2-23 not connected. > Jul 8 14:57:58 kodscha ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1 > Jul 8 14:57:58 kodscha ...trying to set up timer as Virtual Wire IRQ... works. > > and both INT0 and INT2 of IOAPIC are masked: > > Jul 8 14:57:58 kodscha NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect: > Jul 8 14:57:58 kodscha 00 000 1 0 0 0 0 0 0 00 > Jul 8 14:57:58 kodscha 01 003 0 0 0 0 0 1 1 31 > Jul 8 14:57:58 kodscha 02 003 1 0 0 0 0 0 0 30 > > I've also seen strange CPU utilization -- with syslog-ng: > > top - 15:33:06 up 35 min, 4 users, load average: 1.70, 0.68, 0.37 > Tasks: 64 total, 4 running, 60 sleeping, 0 stopped, 0 zombie > Cpu0 : 0.0%us,100.0%sy, 0.0%ni, 0.0%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st > Cpu1 : 6.4%us, 87.2%sy, 0.0%ni, 5.8%id, 0.0%wa, 0.6%hi, 0.0%si, 0.0%st > Mem: 895384k total, 283568k used, 611816k free, 35492k buffers > Swap: 1959920k total, 0k used, 1959920k free, 163044k cached > > PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND > 4632 root 20 0 17216 800 580 S 104 0.1 0:34.22 syslog-ng > 28505 root 20 0 205m 11m 4024 S 6 1.3 0:21.16 X > 28518 root 20 0 56292 5652 4492 S 1 0.6 0:01.80 fluxbox > 1 root 20 0 3724 608 508 S 0 0.1 0:00.36 init > > So far I have no clue why C1E-idle in conjunction with virtual wire > mode causes this strange behaviour. > > ... and I start to think about the root cause of all this. > > I've performed similar tests under X with the IRQ0/INT0 configuration and > I did not see above symptoms. So lets fall back to the IRQ0/INT0 configuration on this box. This basically restores the dont-use-the-lapic-timer exception mechanism that was unconditional on this box prior commit 8750bf5 ("x86: add C1E aware idle function"). Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-08 21:12:26 +07:00
* override in that cases.
*/
x86 ACPI: Blacklist two HP machines with buggy BIOSes There is a bug in the BIOSes of some HP boxes with AMD Turions which connects IO-APIC pins with ACPI thermal trip points in such a way that if the state of the IO-APIC is not as expected by the (buggy) BIOS, the thermal trip points are set to insanely low values (usually all of them become 16 degrees Celsius). As a result, thermal throttling kicks in and knock the system down to its shoes. Unfortunately some of the recent IO-APIC changes made the bug show up. To prevent this from happening, blacklist machines that are known to be affected (nx6115 and 6715b in this particular case). This fixes http://bugzilla.kernel.org/show_bug.cgi?id=11516 listed as a regression from 2.6.26. On my box it was caused by: commit 691874fa96d6349a8b60f8ea9c2bae52ece79941 Author: Maciej W. Rozycki <macro@linux-mips.org> Date: Tue May 27 21:19:51 2008 +0100 x86: I/O APIC: timer through 8259A second-chance Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org> Signed-off-by: Ingo Molnar <mingo@elte.hu> and the whole story is described in this (huge) thread: http://marc.info/?l=linux-kernel&m=121358440508410&w=4 Matthew Garrett told us about that happening on the nx6125: http://marc.info/?l=linux-kernel&m=121396307411930&w=4 and then Maciej analysed the breakage on the basis of a DSDT from the nx6325: http://marc.info/?l=linux-kernel&m=121401068718826&w=4 As far as the Dmitry's and Jason's boxes are concerned, I recognized the symptoms and asked them to verify that the blacklisting helped. It appears that the buggy BIOS code has been copy-pasted to the entire range of machines, for no good reason. Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl> Tested-by: Dmitry Torokhov <dmitry.torokhov@gmail.com> Tested-by: Jason Vas Dias <jason.vas.dias@gmail.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-10-06 16:59:29 +07:00
{
.callback = dmi_ignore_irq0_timer_override,
.ident = "HP nx6115 laptop",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6115"),
},
},
x86: fix C1E && nx6325 stability problem The problems are that, with the ACPI vs timer overring issue _fixed_, after using the box for some time (between several seconds and 1 hour, at random) processes get very high CPU loads (once I've got X using 107% of the CPU, for example) and the system becomes unresponsive, as though there were interrupts lost or something similar. Andreas Herrman reproduced similar problems: > Ok, now I've reproduced the stability problem. > - Using tip/master, > - reverting e38502eb8aa82314d5ab0eba45f50e6790dadd88 and > - applying your patch from this posting > http://marc.info/?l=linux-kernel&m=121539354224562&w=4 > > Starting X, firefox, gimp, tuxpaint and doing some drawing in tuxpaint > results in a slow system. Drawing is almost not possible anymore -- > Selections of new colors, cursors etc. is performed with huge delay > if it's performed at all. > > BTW, the code sets up timer IRQ as Virtual Wire IRQ: > > Jul 8 14:57:58 kodscha IO-APIC (apicid-pin) 2-22, 2-23 not connected. > Jul 8 14:57:58 kodscha ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1 > Jul 8 14:57:58 kodscha ...trying to set up timer as Virtual Wire IRQ... works. > > and both INT0 and INT2 of IOAPIC are masked: > > Jul 8 14:57:58 kodscha NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect: > Jul 8 14:57:58 kodscha 00 000 1 0 0 0 0 0 0 00 > Jul 8 14:57:58 kodscha 01 003 0 0 0 0 0 1 1 31 > Jul 8 14:57:58 kodscha 02 003 1 0 0 0 0 0 0 30 > > I've also seen strange CPU utilization -- with syslog-ng: > > top - 15:33:06 up 35 min, 4 users, load average: 1.70, 0.68, 0.37 > Tasks: 64 total, 4 running, 60 sleeping, 0 stopped, 0 zombie > Cpu0 : 0.0%us,100.0%sy, 0.0%ni, 0.0%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st > Cpu1 : 6.4%us, 87.2%sy, 0.0%ni, 5.8%id, 0.0%wa, 0.6%hi, 0.0%si, 0.0%st > Mem: 895384k total, 283568k used, 611816k free, 35492k buffers > Swap: 1959920k total, 0k used, 1959920k free, 163044k cached > > PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND > 4632 root 20 0 17216 800 580 S 104 0.1 0:34.22 syslog-ng > 28505 root 20 0 205m 11m 4024 S 6 1.3 0:21.16 X > 28518 root 20 0 56292 5652 4492 S 1 0.6 0:01.80 fluxbox > 1 root 20 0 3724 608 508 S 0 0.1 0:00.36 init > > So far I have no clue why C1E-idle in conjunction with virtual wire > mode causes this strange behaviour. > > ... and I start to think about the root cause of all this. > > I've performed similar tests under X with the IRQ0/INT0 configuration and > I did not see above symptoms. So lets fall back to the IRQ0/INT0 configuration on this box. This basically restores the dont-use-the-lapic-timer exception mechanism that was unconditional on this box prior commit 8750bf5 ("x86: add C1E aware idle function"). Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-08 21:12:26 +07:00
{
.callback = dmi_ignore_irq0_timer_override,
.ident = "HP NX6125 laptop",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6125"),
},
},
{
.callback = dmi_ignore_irq0_timer_override,
.ident = "HP NX6325 laptop",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"),
},
},
x86 ACPI: Blacklist two HP machines with buggy BIOSes There is a bug in the BIOSes of some HP boxes with AMD Turions which connects IO-APIC pins with ACPI thermal trip points in such a way that if the state of the IO-APIC is not as expected by the (buggy) BIOS, the thermal trip points are set to insanely low values (usually all of them become 16 degrees Celsius). As a result, thermal throttling kicks in and knock the system down to its shoes. Unfortunately some of the recent IO-APIC changes made the bug show up. To prevent this from happening, blacklist machines that are known to be affected (nx6115 and 6715b in this particular case). This fixes http://bugzilla.kernel.org/show_bug.cgi?id=11516 listed as a regression from 2.6.26. On my box it was caused by: commit 691874fa96d6349a8b60f8ea9c2bae52ece79941 Author: Maciej W. Rozycki <macro@linux-mips.org> Date: Tue May 27 21:19:51 2008 +0100 x86: I/O APIC: timer through 8259A second-chance Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org> Signed-off-by: Ingo Molnar <mingo@elte.hu> and the whole story is described in this (huge) thread: http://marc.info/?l=linux-kernel&m=121358440508410&w=4 Matthew Garrett told us about that happening on the nx6125: http://marc.info/?l=linux-kernel&m=121396307411930&w=4 and then Maciej analysed the breakage on the basis of a DSDT from the nx6325: http://marc.info/?l=linux-kernel&m=121401068718826&w=4 As far as the Dmitry's and Jason's boxes are concerned, I recognized the symptoms and asked them to verify that the blacklisting helped. It appears that the buggy BIOS code has been copy-pasted to the entire range of machines, for no good reason. Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl> Tested-by: Dmitry Torokhov <dmitry.torokhov@gmail.com> Tested-by: Jason Vas Dias <jason.vas.dias@gmail.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-10-06 16:59:29 +07:00
{
.callback = dmi_ignore_irq0_timer_override,
.ident = "HP 6715b laptop",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"),
},
},
{
.callback = dmi_ignore_irq0_timer_override,
.ident = "FUJITSU SIEMENS",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
DMI_MATCH(DMI_PRODUCT_NAME, "AMILO PRO V2030"),
},
},
{}
};
/*
* acpi_boot_table_init() and acpi_boot_init()
* called from setup_arch(), always.
* 1. checksums all tables
* 2. enumerates lapics
* 3. enumerates io-apics
*
* acpi_table_init() is separate to allow reading SRAT without
* other side effects.
*
* side effects of acpi_boot_init:
* acpi_lapic = 1 if LAPIC found
* acpi_ioapic = 1 if IOAPIC found
* if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
* if acpi_blacklisted() acpi_disabled = 1;
* acpi_irq_model=...
* ...
*/
void __init acpi_boot_table_init(void)
{
dmi_check_system(acpi_dmi_table);
/*
* If acpi_disabled, bail out
*/
if (acpi_disabled)
return;
/*
* Initialize the ACPI boot-time table parser.
*/
if (acpi_table_init()) {
disable_acpi();
return;
}
acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
/*
* blacklist may disable ACPI entirely
*/
if (acpi_blacklisted()) {
if (acpi_force) {
printk(KERN_WARNING PREFIX "acpi=force override\n");
} else {
printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
disable_acpi();
return;
}
}
}
int __init early_acpi_boot_init(void)
{
/*
* If acpi_disabled, bail out
*/
if (acpi_disabled)
return 1;
/*
* Process the Multiple APIC Description Table (MADT), if present
*/
early_acpi_process_madt();
/*
* Hardware-reduced ACPI mode initialization:
*/
acpi_reduced_hw_init();
return 0;
}
int __init acpi_boot_init(void)
{
/* those are executed after early-quirks are executed */
dmi_check_system(acpi_dmi_table_late);
/*
* If acpi_disabled, bail out
*/
if (acpi_disabled)
return 1;
acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
/*
* set sci_int and PM timer address
*/
acpi_table_parse(ACPI_SIG_FADT, acpi_parse_fadt);
/*
* Process the Multiple APIC Description Table (MADT), if present
*/
acpi_process_madt();
acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet);
if (IS_ENABLED(CONFIG_ACPI_BGRT))
acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt);
if (!acpi_noirq)
x86_init.pci.init = pci_acpi_init;
/* Do not enable ACPI SPCR console by default */
acpi_parse_spcr(earlycon_acpi_spcr_enable, false);
return 0;
}
static int __init parse_acpi(char *arg)
{
if (!arg)
return -EINVAL;
/* "acpi=off" disables both ACPI table parsing and interpreter */
if (strcmp(arg, "off") == 0) {
disable_acpi();
}
/* acpi=force to over-ride black-list */
else if (strcmp(arg, "force") == 0) {
acpi_force = 1;
acpi_disabled = 0;
}
/* acpi=strict disables out-of-spec workarounds */
else if (strcmp(arg, "strict") == 0) {
acpi_strict = 1;
}
/* acpi=rsdt use RSDT instead of XSDT */
else if (strcmp(arg, "rsdt") == 0) {
acpi_gbl_do_not_use_xsdt = TRUE;
}
/* "acpi=noirq" disables ACPI interrupt routing */
else if (strcmp(arg, "noirq") == 0) {
acpi_noirq_set();
}
/* "acpi=copy_dsdt" copys DSDT */
else if (strcmp(arg, "copy_dsdt") == 0) {
acpi_gbl_copy_dsdt_locally = 1;
}
/* "acpi=nocmcff" disables FF mode for corrected errors */
else if (strcmp(arg, "nocmcff") == 0) {
acpi_disable_cmcff = 1;
} else {
/* Core will printk when we return error. */
return -EINVAL;
}
return 0;
}
early_param("acpi", parse_acpi);
/* FIXME: Using pci= for an ACPI parameter is a travesty. */
static int __init parse_pci(char *arg)
{
if (arg && strcmp(arg, "noacpi") == 0)
acpi_disable_pci();
return 0;
}
early_param("pci", parse_pci);
int __init acpi_mps_check(void)
{
#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_X86_MPPARSE)
/* mptable code is not built-in*/
if (acpi_disabled || acpi_noirq) {
printk(KERN_WARNING "MPS support code is not built-in.\n"
"Using acpi=off or acpi=noirq or pci=noacpi "
"may have problem\n");
return 1;
}
#endif
return 0;
}
#ifdef CONFIG_X86_IO_APIC
static int __init parse_acpi_skip_timer_override(char *arg)
{
acpi_skip_timer_override = 1;
return 0;
}
early_param("acpi_skip_timer_override", parse_acpi_skip_timer_override);
static int __init parse_acpi_use_timer_override(char *arg)
{
acpi_use_timer_override = 1;
return 0;
}
early_param("acpi_use_timer_override", parse_acpi_use_timer_override);
#endif /* CONFIG_X86_IO_APIC */
static int __init setup_acpi_sci(char *s)
{
if (!s)
return -EINVAL;
if (!strcmp(s, "edge"))
acpi_sci_flags = ACPI_MADT_TRIGGER_EDGE |
(acpi_sci_flags & ~ACPI_MADT_TRIGGER_MASK);
else if (!strcmp(s, "level"))
acpi_sci_flags = ACPI_MADT_TRIGGER_LEVEL |
(acpi_sci_flags & ~ACPI_MADT_TRIGGER_MASK);
else if (!strcmp(s, "high"))
acpi_sci_flags = ACPI_MADT_POLARITY_ACTIVE_HIGH |
(acpi_sci_flags & ~ACPI_MADT_POLARITY_MASK);
else if (!strcmp(s, "low"))
acpi_sci_flags = ACPI_MADT_POLARITY_ACTIVE_LOW |
(acpi_sci_flags & ~ACPI_MADT_POLARITY_MASK);
else
return -EINVAL;
return 0;
}
early_param("acpi_sci", setup_acpi_sci);
int __acpi_acquire_global_lock(unsigned int *lock)
{
unsigned int old, new, val;
do {
old = *lock;
new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1));
val = cmpxchg(lock, old, new);
} while (unlikely (val != old));
return (new < 3) ? -1 : 0;
}
int __acpi_release_global_lock(unsigned int *lock)
{
unsigned int old, new, val;
do {
old = *lock;
new = old & ~0x3;
val = cmpxchg(lock, old, new);
} while (unlikely (val != old));
return old & 0x1;
}
void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
{
e820__range_add(addr, size, E820_TYPE_ACPI);
e820__update_table_print();
}