2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
* Intel specific MCE features.
|
|
|
|
* Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
|
2009-02-12 19:49:36 +07:00
|
|
|
* Copyright (C) 2008, 2009 Intel Corporation
|
|
|
|
* Author: Andi Kleen
|
2005-04-17 05:20:36 +07:00
|
|
|
*/
|
|
|
|
|
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 15:04:11 +07:00
|
|
|
#include <linux/gfp.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/interrupt.h>
|
|
|
|
#include <linux/percpu.h>
|
2009-10-07 20:09:06 +07:00
|
|
|
#include <linux/sched.h>
|
2009-06-17 22:31:15 +07:00
|
|
|
#include <asm/apic.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <asm/processor.h>
|
|
|
|
#include <asm/msr.h>
|
|
|
|
#include <asm/mce.h>
|
|
|
|
|
2012-08-10 01:44:51 +07:00
|
|
|
#include "mce-internal.h"
|
|
|
|
|
2009-02-12 19:49:36 +07:00
|
|
|
/*
|
|
|
|
* Support for Intel Correct Machine Check Interrupts. This allows
|
|
|
|
* the CPU to raise an interrupt when a corrected machine check happened.
|
|
|
|
* Normally we pick those up using a regular polling timer.
|
|
|
|
* Also supports reliable discovery of shared banks.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* cmci_discover_lock protects against parallel discovery attempts
|
|
|
|
* which could race against each other.
|
|
|
|
*/
|
2010-07-15 19:28:02 +07:00
|
|
|
static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
|
2009-02-12 19:49:36 +07:00
|
|
|
|
2012-08-10 01:44:51 +07:00
|
|
|
#define CMCI_THRESHOLD 1
|
|
|
|
#define CMCI_POLL_INTERVAL (30 * HZ)
|
|
|
|
#define CMCI_STORM_INTERVAL (1 * HZ)
|
|
|
|
#define CMCI_STORM_THRESHOLD 15
|
|
|
|
|
|
|
|
static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
|
|
|
|
static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
|
|
|
|
static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
|
|
|
|
|
|
|
|
enum {
|
|
|
|
CMCI_STORM_NONE,
|
|
|
|
CMCI_STORM_ACTIVE,
|
|
|
|
CMCI_STORM_SUBSIDED,
|
|
|
|
};
|
|
|
|
|
|
|
|
static atomic_t cmci_storm_on_cpus;
|
2009-02-12 19:49:36 +07:00
|
|
|
|
2009-02-25 04:19:02 +07:00
|
|
|
static int cmci_supported(int *banks)
|
2009-02-12 19:49:36 +07:00
|
|
|
{
|
|
|
|
u64 cap;
|
|
|
|
|
2012-10-16 01:25:17 +07:00
|
|
|
if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
|
2009-06-11 14:06:07 +07:00
|
|
|
return 0;
|
|
|
|
|
2009-02-12 19:49:36 +07:00
|
|
|
/*
|
|
|
|
* Vendor check is not strictly needed, but the initial
|
|
|
|
* initialization is vendor keyed and this
|
|
|
|
* makes sure none of the backdoors are entered otherwise.
|
|
|
|
*/
|
|
|
|
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
|
|
|
|
return 0;
|
|
|
|
if (!cpu_has_apic || lapic_get_maxlvt() < 6)
|
|
|
|
return 0;
|
|
|
|
rdmsrl(MSR_IA32_MCG_CAP, cap);
|
|
|
|
*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
|
|
|
|
return !!(cap & MCG_CMCI_P);
|
|
|
|
}
|
|
|
|
|
2012-08-10 01:44:51 +07:00
|
|
|
void mce_intel_cmci_poll(void)
|
|
|
|
{
|
|
|
|
if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
|
|
|
|
return;
|
|
|
|
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
|
|
|
|
}
|
|
|
|
|
|
|
|
void mce_intel_hcpu_update(unsigned long cpu)
|
|
|
|
{
|
|
|
|
if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
|
|
|
|
atomic_dec(&cmci_storm_on_cpus);
|
|
|
|
|
|
|
|
per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned long mce_intel_adjust_timer(unsigned long interval)
|
|
|
|
{
|
|
|
|
int r;
|
|
|
|
|
|
|
|
if (interval < CMCI_POLL_INTERVAL)
|
|
|
|
return interval;
|
|
|
|
|
|
|
|
switch (__this_cpu_read(cmci_storm_state)) {
|
|
|
|
case CMCI_STORM_ACTIVE:
|
|
|
|
/*
|
|
|
|
* We switch back to interrupt mode once the poll timer has
|
|
|
|
* silenced itself. That means no events recorded and the
|
|
|
|
* timer interval is back to our poll interval.
|
|
|
|
*/
|
|
|
|
__this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
|
|
|
|
r = atomic_sub_return(1, &cmci_storm_on_cpus);
|
|
|
|
if (r == 0)
|
|
|
|
pr_notice("CMCI storm subsided: switching to interrupt mode\n");
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
|
|
|
|
case CMCI_STORM_SUBSIDED:
|
|
|
|
/*
|
|
|
|
* We wait for all cpus to go back to SUBSIDED
|
|
|
|
* state. When that happens we switch back to
|
|
|
|
* interrupt mode.
|
|
|
|
*/
|
|
|
|
if (!atomic_read(&cmci_storm_on_cpus)) {
|
|
|
|
__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
|
|
|
|
cmci_reenable();
|
|
|
|
cmci_recheck();
|
|
|
|
}
|
|
|
|
return CMCI_POLL_INTERVAL;
|
|
|
|
default:
|
|
|
|
/*
|
|
|
|
* We have shiny weather. Let the poll do whatever it
|
|
|
|
* thinks.
|
|
|
|
*/
|
|
|
|
return interval;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool cmci_storm_detect(void)
|
|
|
|
{
|
|
|
|
unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
|
|
|
|
unsigned long ts = __this_cpu_read(cmci_time_stamp);
|
|
|
|
unsigned long now = jiffies;
|
|
|
|
int r;
|
|
|
|
|
|
|
|
if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
|
|
|
|
cnt++;
|
|
|
|
} else {
|
|
|
|
cnt = 1;
|
|
|
|
__this_cpu_write(cmci_time_stamp, now);
|
|
|
|
}
|
|
|
|
__this_cpu_write(cmci_storm_cnt, cnt);
|
|
|
|
|
|
|
|
if (cnt <= CMCI_STORM_THRESHOLD)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
cmci_clear();
|
|
|
|
__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
|
|
|
|
r = atomic_add_return(1, &cmci_storm_on_cpus);
|
|
|
|
mce_timer_kick(CMCI_POLL_INTERVAL);
|
|
|
|
|
|
|
|
if (r == 1)
|
|
|
|
pr_notice("CMCI storm detected: switching to poll mode\n");
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2009-02-12 19:49:36 +07:00
|
|
|
/*
|
|
|
|
* The interrupt handler. This is called on every event.
|
|
|
|
* Just call the poller directly to log any events.
|
|
|
|
* This could in theory increase the threshold under high load,
|
|
|
|
* but doesn't for now.
|
|
|
|
*/
|
|
|
|
static void intel_threshold_interrupt(void)
|
|
|
|
{
|
2012-08-10 01:44:51 +07:00
|
|
|
if (cmci_storm_detect())
|
|
|
|
return;
|
2009-02-12 19:49:36 +07:00
|
|
|
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
|
2009-05-28 02:56:58 +07:00
|
|
|
mce_notify_irq();
|
2009-02-12 19:49:36 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
|
|
|
|
* on this CPU. Use the algorithm recommended in the SDM to discover shared
|
|
|
|
* banks.
|
|
|
|
*/
|
2012-08-10 00:59:21 +07:00
|
|
|
static void cmci_discover(int banks)
|
2009-02-12 19:49:36 +07:00
|
|
|
{
|
|
|
|
unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
|
2009-05-08 15:28:40 +07:00
|
|
|
unsigned long flags;
|
2009-02-12 19:49:36 +07:00
|
|
|
int i;
|
2012-09-28 00:08:00 +07:00
|
|
|
int bios_wrong_thresh = 0;
|
2009-02-12 19:49:36 +07:00
|
|
|
|
2010-07-15 19:28:02 +07:00
|
|
|
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
|
2009-02-12 19:49:36 +07:00
|
|
|
for (i = 0; i < banks; i++) {
|
|
|
|
u64 val;
|
2012-09-28 00:08:00 +07:00
|
|
|
int bios_zero_thresh = 0;
|
2009-02-12 19:49:36 +07:00
|
|
|
|
|
|
|
if (test_bit(i, owned))
|
|
|
|
continue;
|
|
|
|
|
2009-07-09 05:31:44 +07:00
|
|
|
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
|
2009-02-12 19:49:36 +07:00
|
|
|
|
|
|
|
/* Already owned by someone else? */
|
2010-06-08 13:09:08 +07:00
|
|
|
if (val & MCI_CTL2_CMCI_EN) {
|
2012-08-10 00:59:21 +07:00
|
|
|
clear_bit(i, owned);
|
2009-02-12 19:49:36 +07:00
|
|
|
__clear_bit(i, __get_cpu_var(mce_poll_banks));
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2012-09-28 00:08:00 +07:00
|
|
|
if (!mce_bios_cmci_threshold) {
|
|
|
|
val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
|
|
|
|
val |= CMCI_THRESHOLD;
|
|
|
|
} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
|
|
|
|
/*
|
|
|
|
* If bios_cmci_threshold boot option was specified
|
|
|
|
* but the threshold is zero, we'll try to initialize
|
|
|
|
* it to 1.
|
|
|
|
*/
|
|
|
|
bios_zero_thresh = 1;
|
|
|
|
val |= CMCI_THRESHOLD;
|
|
|
|
}
|
|
|
|
|
|
|
|
val |= MCI_CTL2_CMCI_EN;
|
2009-07-09 05:31:44 +07:00
|
|
|
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
|
|
|
|
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
|
2009-02-12 19:49:36 +07:00
|
|
|
|
|
|
|
/* Did the enable bit stick? -- the bank supports CMCI */
|
2010-06-08 13:09:08 +07:00
|
|
|
if (val & MCI_CTL2_CMCI_EN) {
|
2012-08-10 00:59:21 +07:00
|
|
|
set_bit(i, owned);
|
2009-02-12 19:49:36 +07:00
|
|
|
__clear_bit(i, __get_cpu_var(mce_poll_banks));
|
2012-09-28 00:08:00 +07:00
|
|
|
/*
|
|
|
|
* We are able to set thresholds for some banks that
|
|
|
|
* had a threshold of 0. This means the BIOS has not
|
|
|
|
* set the thresholds properly or does not work with
|
|
|
|
* this boot option. Note down now and report later.
|
|
|
|
*/
|
|
|
|
if (mce_bios_cmci_threshold && bios_zero_thresh &&
|
|
|
|
(val & MCI_CTL2_CMCI_THRESHOLD_MASK))
|
|
|
|
bios_wrong_thresh = 1;
|
2009-02-12 19:49:36 +07:00
|
|
|
} else {
|
|
|
|
WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
|
|
|
|
}
|
|
|
|
}
|
2010-07-15 19:28:02 +07:00
|
|
|
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
2012-09-28 00:08:00 +07:00
|
|
|
if (mce_bios_cmci_threshold && bios_wrong_thresh) {
|
|
|
|
pr_info_once(
|
|
|
|
"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
|
|
|
|
pr_info_once(
|
|
|
|
"bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
|
|
|
|
}
|
2009-02-12 19:49:36 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Just in case we missed an event during initialization check
|
|
|
|
* all the CMCI owned banks.
|
|
|
|
*/
|
2009-02-25 04:19:02 +07:00
|
|
|
void cmci_recheck(void)
|
2009-02-12 19:49:36 +07:00
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
int banks;
|
|
|
|
|
2010-12-18 22:30:05 +07:00
|
|
|
if (!mce_available(__this_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
|
2009-02-12 19:49:36 +07:00
|
|
|
return;
|
|
|
|
local_irq_save(flags);
|
|
|
|
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
|
|
|
|
local_irq_restore(flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Disable CMCI on this CPU for all banks it owns when it goes down.
|
|
|
|
* This allows other CPUs to claim the banks on rediscovery.
|
|
|
|
*/
|
2009-02-25 04:19:02 +07:00
|
|
|
void cmci_clear(void)
|
2009-02-12 19:49:36 +07:00
|
|
|
{
|
2009-05-08 15:28:40 +07:00
|
|
|
unsigned long flags;
|
2009-02-12 19:49:36 +07:00
|
|
|
int i;
|
|
|
|
int banks;
|
|
|
|
u64 val;
|
|
|
|
|
|
|
|
if (!cmci_supported(&banks))
|
|
|
|
return;
|
2010-07-15 19:28:02 +07:00
|
|
|
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
|
2009-02-12 19:49:36 +07:00
|
|
|
for (i = 0; i < banks; i++) {
|
|
|
|
if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
|
|
|
|
continue;
|
|
|
|
/* Disable CMCI */
|
2009-07-09 05:31:44 +07:00
|
|
|
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
|
2012-09-28 00:08:00 +07:00
|
|
|
val &= ~MCI_CTL2_CMCI_EN;
|
2009-07-09 05:31:44 +07:00
|
|
|
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
|
2009-02-12 19:49:36 +07:00
|
|
|
__clear_bit(i, __get_cpu_var(mce_banks_owned));
|
|
|
|
}
|
2010-07-15 19:28:02 +07:00
|
|
|
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
2009-02-12 19:49:36 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* After a CPU went down cycle through all the others and rediscover
|
|
|
|
* Must run in process context.
|
|
|
|
*/
|
2009-02-25 04:19:02 +07:00
|
|
|
void cmci_rediscover(int dying)
|
2009-02-12 19:49:36 +07:00
|
|
|
{
|
|
|
|
int banks;
|
|
|
|
int cpu;
|
|
|
|
cpumask_var_t old;
|
|
|
|
|
|
|
|
if (!cmci_supported(&banks))
|
|
|
|
return;
|
|
|
|
if (!alloc_cpumask_var(&old, GFP_KERNEL))
|
|
|
|
return;
|
|
|
|
cpumask_copy(old, ¤t->cpus_allowed);
|
|
|
|
|
2009-04-14 15:09:04 +07:00
|
|
|
for_each_online_cpu(cpu) {
|
2009-02-12 19:49:36 +07:00
|
|
|
if (cpu == dying)
|
|
|
|
continue;
|
2009-03-13 11:19:54 +07:00
|
|
|
if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
|
2009-02-12 19:49:36 +07:00
|
|
|
continue;
|
|
|
|
/* Recheck banks in case CPUs don't all have the same */
|
|
|
|
if (cmci_supported(&banks))
|
2012-08-10 00:59:21 +07:00
|
|
|
cmci_discover(banks);
|
2009-02-12 19:49:36 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
set_cpus_allowed_ptr(current, old);
|
|
|
|
free_cpumask_var(old);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Reenable CMCI on this CPU in case a CPU down failed.
|
|
|
|
*/
|
|
|
|
void cmci_reenable(void)
|
|
|
|
{
|
|
|
|
int banks;
|
|
|
|
if (cmci_supported(&banks))
|
2012-08-10 00:59:21 +07:00
|
|
|
cmci_discover(banks);
|
2009-02-12 19:49:36 +07:00
|
|
|
}
|
|
|
|
|
2009-03-16 15:07:33 +07:00
|
|
|
static void intel_init_cmci(void)
|
2009-02-12 19:49:36 +07:00
|
|
|
{
|
|
|
|
int banks;
|
|
|
|
|
|
|
|
if (!cmci_supported(&banks))
|
|
|
|
return;
|
|
|
|
|
|
|
|
mce_threshold_vector = intel_threshold_interrupt;
|
2012-08-10 00:59:21 +07:00
|
|
|
cmci_discover(banks);
|
2009-02-12 19:49:36 +07:00
|
|
|
/*
|
|
|
|
* For CPU #0 this runs with still disabled APIC, but that's
|
|
|
|
* ok because only the vector is set up. We still do another
|
|
|
|
* check for the banks later for CPU #0 just to make sure
|
|
|
|
* to not miss any events.
|
|
|
|
*/
|
|
|
|
apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
|
|
|
|
cmci_recheck();
|
|
|
|
}
|
|
|
|
|
2009-02-21 14:35:51 +07:00
|
|
|
void mce_intel_feature_init(struct cpuinfo_x86 *c)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
|
|
|
intel_init_thermal(c);
|
2009-02-12 19:49:36 +07:00
|
|
|
intel_init_cmci();
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|