mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-24 21:30:53 +07:00
Merge branch 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: x86, mce: Use HW_ERR in MCE handler x86, mce: Add HW_ERR printk prefix for hardware error logging x86, mce: Fix MSR_IA32_MCI_CTL2 CMCI threshold setup x86, mce: Rename MSR_IA32_MCx_CTL2 value
This commit is contained in:
commit
e8779776af
@ -38,6 +38,10 @@
|
||||
#define MCM_ADDR_MEM 3 /* memory address */
|
||||
#define MCM_ADDR_GENERIC 7 /* generic */
|
||||
|
||||
/* CTL2 register defines */
|
||||
#define MCI_CTL2_CMCI_EN (1ULL << 30)
|
||||
#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL
|
||||
|
||||
#define MCJ_CTX_MASK 3
|
||||
#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
|
||||
#define MCJ_CTX_RANDOM 0 /* inject context: random */
|
||||
|
@ -96,9 +96,6 @@
|
||||
#define MSR_IA32_MC0_CTL2 0x00000280
|
||||
#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x))
|
||||
|
||||
#define CMCI_EN (1ULL << 30)
|
||||
#define CMCI_THRESHOLD_MASK 0xffffULL
|
||||
|
||||
#define MSR_P6_PERFCTR0 0x000000c1
|
||||
#define MSR_P6_PERFCTR1 0x000000c2
|
||||
#define MSR_P6_EVNTSEL0 0x00000186
|
||||
|
@ -107,8 +107,8 @@ EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
|
||||
static int default_decode_mce(struct notifier_block *nb, unsigned long val,
|
||||
void *data)
|
||||
{
|
||||
pr_emerg("No human readable MCE decoding support on this CPU type.\n");
|
||||
pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
|
||||
pr_emerg(HW_ERR "No human readable MCE decoding support on this CPU type.\n");
|
||||
pr_emerg(HW_ERR "Run the message through 'mcelog --ascii' to decode.\n");
|
||||
|
||||
return NOTIFY_STOP;
|
||||
}
|
||||
@ -211,11 +211,11 @@ void mce_log(struct mce *mce)
|
||||
|
||||
static void print_mce(struct mce *m)
|
||||
{
|
||||
pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
|
||||
pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
|
||||
m->extcpu, m->mcgstatus, m->bank, m->status);
|
||||
|
||||
if (m->ip) {
|
||||
pr_emerg("RIP%s %02x:<%016Lx> ",
|
||||
pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ",
|
||||
!(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
|
||||
m->cs, m->ip);
|
||||
|
||||
@ -224,14 +224,14 @@ static void print_mce(struct mce *m)
|
||||
pr_cont("\n");
|
||||
}
|
||||
|
||||
pr_emerg("TSC %llx ", m->tsc);
|
||||
pr_emerg(HW_ERR "TSC %llx ", m->tsc);
|
||||
if (m->addr)
|
||||
pr_cont("ADDR %llx ", m->addr);
|
||||
if (m->misc)
|
||||
pr_cont("MISC %llx ", m->misc);
|
||||
|
||||
pr_cont("\n");
|
||||
pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
|
||||
pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
|
||||
m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid);
|
||||
|
||||
/*
|
||||
@ -241,16 +241,6 @@ static void print_mce(struct mce *m)
|
||||
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
|
||||
}
|
||||
|
||||
static void print_mce_head(void)
|
||||
{
|
||||
pr_emerg("\nHARDWARE ERROR\n");
|
||||
}
|
||||
|
||||
static void print_mce_tail(void)
|
||||
{
|
||||
pr_emerg("This is not a software problem!\n");
|
||||
}
|
||||
|
||||
#define PANIC_TIMEOUT 5 /* 5 seconds */
|
||||
|
||||
static atomic_t mce_paniced;
|
||||
@ -291,7 +281,6 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
|
||||
if (atomic_inc_return(&mce_fake_paniced) > 1)
|
||||
return;
|
||||
}
|
||||
print_mce_head();
|
||||
/* First print corrected ones that are still unlogged */
|
||||
for (i = 0; i < MCE_LOG_LEN; i++) {
|
||||
struct mce *m = &mcelog.entry[i];
|
||||
@ -322,16 +311,15 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
|
||||
apei_err = apei_write_mce(final);
|
||||
}
|
||||
if (cpu_missing)
|
||||
printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n");
|
||||
print_mce_tail();
|
||||
pr_emerg(HW_ERR "Some CPUs didn't answer in synchronization\n");
|
||||
if (exp)
|
||||
printk(KERN_EMERG "Machine check: %s\n", exp);
|
||||
pr_emerg(HW_ERR "Machine check: %s\n", exp);
|
||||
if (!fake_panic) {
|
||||
if (panic_timeout == 0)
|
||||
panic_timeout = mce_panic_timeout;
|
||||
panic(msg);
|
||||
} else
|
||||
printk(KERN_EMERG "Fake kernel panic: %s\n", msg);
|
||||
pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);
|
||||
}
|
||||
|
||||
/* Support code for software error injection */
|
||||
@ -1221,7 +1209,7 @@ int mce_notify_irq(void)
|
||||
schedule_work(&mce_trigger_work);
|
||||
|
||||
if (__ratelimit(&ratelimit))
|
||||
printk(KERN_INFO "Machine check events logged\n");
|
||||
pr_info(HW_ERR "Machine check events logged\n");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -95,19 +95,20 @@ static void cmci_discover(int banks, int boot)
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
|
||||
/* Already owned by someone else? */
|
||||
if (val & CMCI_EN) {
|
||||
if (val & MCI_CTL2_CMCI_EN) {
|
||||
if (test_and_clear_bit(i, owned) && !boot)
|
||||
print_update("SHD", &hdr, i);
|
||||
__clear_bit(i, __get_cpu_var(mce_poll_banks));
|
||||
continue;
|
||||
}
|
||||
|
||||
val |= CMCI_EN | CMCI_THRESHOLD;
|
||||
val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
|
||||
val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD;
|
||||
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
|
||||
/* Did the enable bit stick? -- the bank supports CMCI */
|
||||
if (val & CMCI_EN) {
|
||||
if (val & MCI_CTL2_CMCI_EN) {
|
||||
if (!test_and_set_bit(i, owned) && !boot)
|
||||
print_update("CMCI", &hdr, i);
|
||||
__clear_bit(i, __get_cpu_var(mce_poll_banks));
|
||||
@ -155,7 +156,7 @@ void cmci_clear(void)
|
||||
continue;
|
||||
/* Disable CMCI */
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
|
||||
val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK);
|
||||
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
__clear_bit(i, __get_cpu_var(mce_banks_owned));
|
||||
}
|
||||
|
@ -252,6 +252,13 @@ extern struct pid *session_of_pgrp(struct pid *pgrp);
|
||||
#define FW_WARN "[Firmware Warn]: "
|
||||
#define FW_INFO "[Firmware Info]: "
|
||||
|
||||
/*
|
||||
* HW_ERR
|
||||
* Add this to a message for hardware errors, so that user can report
|
||||
* it to hardware vendor instead of LKML or software vendor.
|
||||
*/
|
||||
#define HW_ERR "[Hardware Error]: "
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
asmlinkage int vprintk(const char *fmt, va_list args)
|
||||
__attribute__ ((format (printf, 1, 0)));
|
||||
|
Loading…
Reference in New Issue
Block a user