mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-02 15:36:42 +07:00
Fix the code to tell when a CMCI storm ends by actually
looking at the machine check banks when we poll while interrupts are disabled. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.14 (GNU/Linux) iQIcBAABAgAGBQJTOZzGAAoJEKurIx+X31iBD6IP/2zd94hyif6gl8qGEqRUO0Ts 3Bo+P8DhFXH7VY5qvN9dP4raOfQjb8B2pFCShq6c4nr1lv6AKb4Gehf9EcQnXF20 z10vZuKGyJfyck0dKr3xE35V34DAARNoCJCWoSwutbqEaCQT/7J/cymkgdqUgHo9 f7XIeMy51dAVw7IeYleNLlVdDet1s5BuCPVdJOIqirGA9qfdDUUVuYmBB/mrbneI uftl27zd+KM4tcuNkVxrsRbZ8q/IOu1mhtb09aDpeKLLRfl282XsLr1j+52lbM8t RPieFO56jGlZn5015aWNTXARqhdKQ8wnUzttEJCGkl+p7bqNp3pQmpbNUD2ZpJF8 euR89OaxaOmU8Y1anfw/pA1G7WumkTG9WjOHRQFzSERPq1GF+YLlkaSwVTkgT9Ny 6ubvGr9WYPTxzNeDqUeu4HjCKOS1soWak7zIoXbu2Yu59scCcITE/cKw3y76BZ2O 2gKkBmBxlaZt8DBmPR2W4A+Uhb7lhJWvrYdCwpnrMA0nDCKOEDs5g5MVr5xMAEJ4 Zy1X6hwOwvmOmJp/DdHrsmfsRhCRV/itjb0vpfEF+MzWYbECD8Zyslb6YP5E5ZoV eWoKglC/W25907YJZiX2yzsCi+GNjLO/0V5ZJHwCxrWPSUnSJMeXjgbslIEQyOlC 0Gkj2DUIrgsGJDhwTmAJ =ZqzJ -----END PGP SIGNATURE----- Merge tag 'please-pull-cmci-storm' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras into x86/urgent Pull RAS/CMCI storm code fix from Tony Luck: "Fix the code to tell when a CMCI storm ends by actually looking at the machine check banks when we poll while interrupts are disabled." Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
b8c89c6a0d
@ -89,6 +89,9 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
|
||||
static DEFINE_PER_CPU(struct mce, mces_seen);
|
||||
static int cpu_missing;
|
||||
|
||||
/* CMCI storm detection filter */
|
||||
static DEFINE_PER_CPU(unsigned long, mce_polled_error);
|
||||
|
||||
/*
|
||||
* MCA banks polled by the period polling timer for corrected events.
|
||||
* With Intel CMCI, this only has MCA banks which do not support CMCI (if any).
|
||||
@ -595,6 +598,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
|
||||
{
|
||||
struct mce m;
|
||||
int i;
|
||||
unsigned long *v;
|
||||
|
||||
this_cpu_inc(mce_poll_count);
|
||||
|
||||
@ -614,6 +618,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
|
||||
if (!(m.status & MCI_STATUS_VAL))
|
||||
continue;
|
||||
|
||||
v = &get_cpu_var(mce_polled_error);
|
||||
set_bit(0, v);
|
||||
/*
|
||||
* Uncorrected or signalled events are handled by the exception
|
||||
* handler when it is enabled, so don't process those here.
|
||||
@ -1278,10 +1284,18 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
|
||||
static unsigned long (*mce_adjust_timer)(unsigned long interval) =
|
||||
mce_adjust_timer_default;
|
||||
|
||||
static int cmc_error_seen(void)
|
||||
{
|
||||
unsigned long *v = &__get_cpu_var(mce_polled_error);
|
||||
|
||||
return test_and_clear_bit(0, v);
|
||||
}
|
||||
|
||||
static void mce_timer_fn(unsigned long data)
|
||||
{
|
||||
struct timer_list *t = &__get_cpu_var(mce_timer);
|
||||
unsigned long iv;
|
||||
int notify;
|
||||
|
||||
WARN_ON(smp_processor_id() != data);
|
||||
|
||||
@ -1296,7 +1310,9 @@ static void mce_timer_fn(unsigned long data)
|
||||
* polling interval, otherwise increase the polling interval.
|
||||
*/
|
||||
iv = __this_cpu_read(mce_next_interval);
|
||||
if (mce_notify_irq()) {
|
||||
notify = mce_notify_irq();
|
||||
notify |= cmc_error_seen();
|
||||
if (notify) {
|
||||
iv = max(iv / 2, (unsigned long) HZ/100);
|
||||
} else {
|
||||
iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/msr.h>
|
||||
@ -137,6 +138,22 @@ unsigned long mce_intel_adjust_timer(unsigned long interval)
|
||||
}
|
||||
}
|
||||
|
||||
static void cmci_storm_disable_banks(void)
|
||||
{
|
||||
unsigned long flags, *owned;
|
||||
int bank;
|
||||
u64 val;
|
||||
|
||||
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
|
||||
owned = __get_cpu_var(mce_banks_owned);
|
||||
for_each_set_bit(bank, owned, MAX_NR_BANKS) {
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
|
||||
val &= ~MCI_CTL2_CMCI_EN;
|
||||
wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||
}
|
||||
|
||||
static bool cmci_storm_detect(void)
|
||||
{
|
||||
unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
|
||||
@ -158,7 +175,7 @@ static bool cmci_storm_detect(void)
|
||||
if (cnt <= CMCI_STORM_THRESHOLD)
|
||||
return false;
|
||||
|
||||
cmci_clear();
|
||||
cmci_storm_disable_banks();
|
||||
__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
|
||||
r = atomic_add_return(1, &cmci_storm_on_cpus);
|
||||
mce_timer_kick(CMCI_POLL_INTERVAL);
|
||||
|
Loading…
Reference in New Issue
Block a user