Merge branch 'apei' into release

This commit is contained in:
Len Brown 2010-08-14 23:55:47 -04:00
commit feb29c5175
8 changed files with 392 additions and 101 deletions

View File

@ -80,7 +80,7 @@ int apei_write_mce(struct mce *m)
rcd.hdr.revision = CPER_RECORD_REV; rcd.hdr.revision = CPER_RECORD_REV;
rcd.hdr.signature_end = CPER_SIG_END; rcd.hdr.signature_end = CPER_SIG_END;
rcd.hdr.section_count = 1; rcd.hdr.section_count = 1;
rcd.hdr.error_severity = CPER_SER_FATAL; rcd.hdr.error_severity = CPER_SEV_FATAL;
/* timestamp, platform_id, partition_id are all invalid */ /* timestamp, platform_id, partition_id are all invalid */
rcd.hdr.validation_bits = 0; rcd.hdr.validation_bits = 0;
rcd.hdr.record_length = sizeof(rcd); rcd.hdr.record_length = sizeof(rcd);
@ -96,7 +96,7 @@ int apei_write_mce(struct mce *m)
rcd.sec_hdr.validation_bits = 0; rcd.sec_hdr.validation_bits = 0;
rcd.sec_hdr.flags = CPER_SEC_PRIMARY; rcd.sec_hdr.flags = CPER_SEC_PRIMARY;
rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE; rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
rcd.sec_hdr.section_severity = CPER_SER_FATAL; rcd.sec_hdr.section_severity = CPER_SEV_FATAL;
memcpy(&rcd.mce, m, sizeof(*m)); memcpy(&rcd.mce, m, sizeof(*m));

View File

@ -28,3 +28,12 @@ config ACPI_APEI_EINJ
EINJ provides a hardware error injection mechanism, it is EINJ provides a hardware error injection mechanism, it is
mainly used for debugging and testing the other parts of mainly used for debugging and testing the other parts of
APEI and some other RAS features. APEI and some other RAS features.
config ACPI_APEI_ERST_DEBUG
tristate "APEI Error Record Serialization Table (ERST) Debug Support"
depends on ACPI_APEI
help
ERST is a way provided by APEI to save and retrieve hardware
error infomation to and from a persistent store. Enable this
if you want to debugging and testing the ERST kernel support
and firmware implementation.

View File

@ -1,5 +1,6 @@
obj-$(CONFIG_ACPI_APEI) += apei.o obj-$(CONFIG_ACPI_APEI) += apei.o
obj-$(CONFIG_ACPI_APEI_GHES) += ghes.o obj-$(CONFIG_ACPI_APEI_GHES) += ghes.o
obj-$(CONFIG_ACPI_APEI_EINJ) += einj.o obj-$(CONFIG_ACPI_APEI_EINJ) += einj.o
obj-$(CONFIG_ACPI_APEI_ERST_DEBUG) += erst-dbg.o
apei-y := apei-base.o hest.o cper.o erst.o apei-y := apei-base.o hest.o cper.o erst.o

View File

@ -482,14 +482,14 @@ int apei_resources_request(struct apei_resources *resources,
list_for_each_entry(res, &resources->ioport, list) { list_for_each_entry(res, &resources->ioport, list) {
if (res == res_bak) if (res == res_bak)
break; break;
release_mem_region(res->start, res->end - res->start); release_region(res->start, res->end - res->start);
} }
res_bak = NULL; res_bak = NULL;
err_unmap_iomem: err_unmap_iomem:
list_for_each_entry(res, &resources->iomem, list) { list_for_each_entry(res, &resources->iomem, list) {
if (res == res_bak) if (res == res_bak)
break; break;
release_region(res->start, res->end - res->start); release_mem_region(res->start, res->end - res->start);
} }
return -EINVAL; return -EINVAL;
} }

View File

@ -0,0 +1,207 @@
/*
* APEI Error Record Serialization Table debug support
*
* ERST is a way provided by APEI to save and retrieve hardware error
* infomation to and from a persistent store. This file provide the
* debugging/testing support for ERST kernel support and firmware
* implementation.
*
* Copyright 2010 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/uaccess.h>
#include <acpi/apei.h>
#include <linux/miscdevice.h>
#include "apei-internal.h"
#define ERST_DBG_PFX "ERST DBG: "
#define ERST_DBG_RECORD_LEN_MAX 4096
static void *erst_dbg_buf;
static unsigned int erst_dbg_buf_len;
/* Prevent erst_dbg_read/write from being invoked concurrently */
static DEFINE_MUTEX(erst_dbg_mutex);
static int erst_dbg_open(struct inode *inode, struct file *file)
{
if (erst_disable)
return -ENODEV;
return nonseekable_open(inode, file);
}
static long erst_dbg_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
{
int rc;
u64 record_id;
u32 record_count;
switch (cmd) {
case APEI_ERST_CLEAR_RECORD:
rc = copy_from_user(&record_id, (void __user *)arg,
sizeof(record_id));
if (rc)
return -EFAULT;
return erst_clear(record_id);
case APEI_ERST_GET_RECORD_COUNT:
rc = erst_get_record_count();
if (rc < 0)
return rc;
record_count = rc;
rc = put_user(record_count, (u32 __user *)arg);
if (rc)
return rc;
return 0;
default:
return -ENOTTY;
}
}
static ssize_t erst_dbg_read(struct file *filp, char __user *ubuf,
size_t usize, loff_t *off)
{
int rc;
ssize_t len = 0;
u64 id;
if (*off != 0)
return -EINVAL;
if (mutex_lock_interruptible(&erst_dbg_mutex) != 0)
return -EINTR;
retry_next:
rc = erst_get_next_record_id(&id);
if (rc)
goto out;
/* no more record */
if (id == APEI_ERST_INVALID_RECORD_ID)
goto out;
retry:
rc = len = erst_read(id, erst_dbg_buf, erst_dbg_buf_len);
/* The record may be cleared by others, try read next record */
if (rc == -ENOENT)
goto retry_next;
if (rc < 0)
goto out;
if (len > ERST_DBG_RECORD_LEN_MAX) {
pr_warning(ERST_DBG_PFX
"Record (ID: 0x%llx) length is too long: %zd\n",
id, len);
rc = -EIO;
goto out;
}
if (len > erst_dbg_buf_len) {
kfree(erst_dbg_buf);
rc = -ENOMEM;
erst_dbg_buf = kmalloc(len, GFP_KERNEL);
if (!erst_dbg_buf)
goto out;
erst_dbg_buf_len = len;
goto retry;
}
rc = -EINVAL;
if (len > usize)
goto out;
rc = -EFAULT;
if (copy_to_user(ubuf, erst_dbg_buf, len))
goto out;
rc = 0;
out:
mutex_unlock(&erst_dbg_mutex);
return rc ? rc : len;
}
static ssize_t erst_dbg_write(struct file *filp, const char __user *ubuf,
size_t usize, loff_t *off)
{
int rc;
struct cper_record_header *rcd;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (usize > ERST_DBG_RECORD_LEN_MAX) {
pr_err(ERST_DBG_PFX "Too long record to be written\n");
return -EINVAL;
}
if (mutex_lock_interruptible(&erst_dbg_mutex))
return -EINTR;
if (usize > erst_dbg_buf_len) {
kfree(erst_dbg_buf);
rc = -ENOMEM;
erst_dbg_buf = kmalloc(usize, GFP_KERNEL);
if (!erst_dbg_buf)
goto out;
erst_dbg_buf_len = usize;
}
rc = copy_from_user(erst_dbg_buf, ubuf, usize);
if (rc) {
rc = -EFAULT;
goto out;
}
rcd = erst_dbg_buf;
rc = -EINVAL;
if (rcd->record_length != usize)
goto out;
rc = erst_write(erst_dbg_buf);
out:
mutex_unlock(&erst_dbg_mutex);
return rc < 0 ? rc : usize;
}
static const struct file_operations erst_dbg_ops = {
.owner = THIS_MODULE,
.open = erst_dbg_open,
.read = erst_dbg_read,
.write = erst_dbg_write,
.unlocked_ioctl = erst_dbg_ioctl,
};
static struct miscdevice erst_dbg_dev = {
.minor = MISC_DYNAMIC_MINOR,
.name = "erst_dbg",
.fops = &erst_dbg_ops,
};
static __init int erst_dbg_init(void)
{
return misc_register(&erst_dbg_dev);
}
static __exit void erst_dbg_exit(void)
{
misc_deregister(&erst_dbg_dev);
kfree(erst_dbg_buf);
}
module_init(erst_dbg_init);
module_exit(erst_dbg_exit);
MODULE_AUTHOR("Huang Ying");
MODULE_DESCRIPTION("APEI Error Record Serialization Table debug support");
MODULE_LICENSE("GPL");

View File

@ -41,6 +41,8 @@
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/cper.h> #include <linux/cper.h>
#include <linux/kdebug.h> #include <linux/kdebug.h>
#include <linux/platform_device.h>
#include <linux/mutex.h>
#include <acpi/apei.h> #include <acpi/apei.h>
#include <acpi/atomicio.h> #include <acpi/atomicio.h>
#include <acpi/hed.h> #include <acpi/hed.h>
@ -87,6 +89,7 @@ struct ghes {
* used for that. * used for that.
*/ */
static LIST_HEAD(ghes_sci); static LIST_HEAD(ghes_sci);
static DEFINE_MUTEX(ghes_list_mutex);
static struct ghes *ghes_new(struct acpi_hest_generic *generic) static struct ghes *ghes_new(struct acpi_hest_generic *generic)
{ {
@ -132,26 +135,26 @@ static void ghes_fini(struct ghes *ghes)
} }
enum { enum {
GHES_SER_NO = 0x0, GHES_SEV_NO = 0x0,
GHES_SER_CORRECTED = 0x1, GHES_SEV_CORRECTED = 0x1,
GHES_SER_RECOVERABLE = 0x2, GHES_SEV_RECOVERABLE = 0x2,
GHES_SER_PANIC = 0x3, GHES_SEV_PANIC = 0x3,
}; };
static inline int ghes_severity(int severity) static inline int ghes_severity(int severity)
{ {
switch (severity) { switch (severity) {
case CPER_SER_INFORMATIONAL: case CPER_SEV_INFORMATIONAL:
return GHES_SER_NO; return GHES_SEV_NO;
case CPER_SER_CORRECTED: case CPER_SEV_CORRECTED:
return GHES_SER_CORRECTED; return GHES_SEV_CORRECTED;
case CPER_SER_RECOVERABLE: case CPER_SEV_RECOVERABLE:
return GHES_SER_RECOVERABLE; return GHES_SEV_RECOVERABLE;
case CPER_SER_FATAL: case CPER_SEV_FATAL:
return GHES_SER_PANIC; return GHES_SEV_PANIC;
default: default:
/* Unkown, go panic */ /* Unkown, go panic */
return GHES_SER_PANIC; return GHES_SEV_PANIC;
} }
} }
@ -237,16 +240,16 @@ static void ghes_clear_estatus(struct ghes *ghes)
static void ghes_do_proc(struct ghes *ghes) static void ghes_do_proc(struct ghes *ghes)
{ {
int ser, processed = 0; int sev, processed = 0;
struct acpi_hest_generic_data *gdata; struct acpi_hest_generic_data *gdata;
ser = ghes_severity(ghes->estatus->error_severity); sev = ghes_severity(ghes->estatus->error_severity);
apei_estatus_for_each_section(ghes->estatus, gdata) { apei_estatus_for_each_section(ghes->estatus, gdata) {
#ifdef CONFIG_X86_MCE #ifdef CONFIG_X86_MCE
if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
CPER_SEC_PLATFORM_MEM)) { CPER_SEC_PLATFORM_MEM)) {
apei_mce_report_mem_error( apei_mce_report_mem_error(
ser == GHES_SER_CORRECTED, sev == GHES_SEV_CORRECTED,
(struct cper_sec_mem_err *)(gdata+1)); (struct cper_sec_mem_err *)(gdata+1));
processed = 1; processed = 1;
} }
@ -293,18 +296,15 @@ static struct notifier_block ghes_notifier_sci = {
.notifier_call = ghes_notify_sci, .notifier_call = ghes_notify_sci,
}; };
static int hest_ghes_parse(struct acpi_hest_header *hest_hdr, void *data) static int __devinit ghes_probe(struct platform_device *ghes_dev)
{ {
struct acpi_hest_generic *generic; struct acpi_hest_generic *generic;
struct ghes *ghes = NULL; struct ghes *ghes = NULL;
int rc = 0; int rc = -EINVAL;
if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR) generic = ghes_dev->dev.platform_data;
return 0;
generic = (struct acpi_hest_generic *)hest_hdr;
if (!generic->enabled) if (!generic->enabled)
return 0; return -ENODEV;
if (generic->error_block_length < if (generic->error_block_length <
sizeof(struct acpi_hest_generic_status)) { sizeof(struct acpi_hest_generic_status)) {
@ -327,62 +327,91 @@ static int hest_ghes_parse(struct acpi_hest_header *hest_hdr, void *data)
ghes = NULL; ghes = NULL;
goto err; goto err;
} }
switch (generic->notify.type) { if (generic->notify.type == ACPI_HEST_NOTIFY_SCI) {
case ACPI_HEST_NOTIFY_POLLED: mutex_lock(&ghes_list_mutex);
pr_warning(GHES_PFX
"Generic hardware error source: %d notified via POLL is not supported!\n",
generic->header.source_id);
break;
case ACPI_HEST_NOTIFY_EXTERNAL:
case ACPI_HEST_NOTIFY_LOCAL:
pr_warning(GHES_PFX
"Generic hardware error source: %d notified via IRQ is not supported!\n",
generic->header.source_id);
break;
case ACPI_HEST_NOTIFY_SCI:
if (list_empty(&ghes_sci)) if (list_empty(&ghes_sci))
register_acpi_hed_notifier(&ghes_notifier_sci); register_acpi_hed_notifier(&ghes_notifier_sci);
list_add_rcu(&ghes->list, &ghes_sci); list_add_rcu(&ghes->list, &ghes_sci);
break; mutex_unlock(&ghes_list_mutex);
case ACPI_HEST_NOTIFY_NMI: } else {
pr_warning(GHES_PFX unsigned char *notify = NULL;
"Generic hardware error source: %d notified via NMI is not supported!\n",
generic->header.source_id); switch (generic->notify.type) {
break; case ACPI_HEST_NOTIFY_POLLED:
default: notify = "POLL";
pr_warning(FW_WARN GHES_PFX break;
"Unknown notification type: %u for generic hardware error source: %d\n", case ACPI_HEST_NOTIFY_EXTERNAL:
generic->notify.type, generic->header.source_id); case ACPI_HEST_NOTIFY_LOCAL:
break; notify = "IRQ";
break;
case ACPI_HEST_NOTIFY_NMI:
notify = "NMI";
break;
}
if (notify) {
pr_warning(GHES_PFX
"Generic hardware error source: %d notified via %s is not supported!\n",
generic->header.source_id, notify);
} else {
pr_warning(FW_WARN GHES_PFX
"Unknown notification type: %u for generic hardware error source: %d\n",
generic->notify.type, generic->header.source_id);
}
rc = -ENODEV;
goto err;
} }
platform_set_drvdata(ghes_dev, ghes);
return 0; return 0;
err: err:
if (ghes) if (ghes) {
ghes_fini(ghes);
return rc;
}
static void ghes_cleanup(void)
{
struct ghes *ghes, *nghes;
if (!list_empty(&ghes_sci))
unregister_acpi_hed_notifier(&ghes_notifier_sci);
synchronize_rcu();
list_for_each_entry_safe(ghes, nghes, &ghes_sci, list) {
list_del(&ghes->list);
ghes_fini(ghes); ghes_fini(ghes);
kfree(ghes); kfree(ghes);
} }
return rc;
} }
static int __devexit ghes_remove(struct platform_device *ghes_dev)
{
struct ghes *ghes;
struct acpi_hest_generic *generic;
ghes = platform_get_drvdata(ghes_dev);
generic = ghes->generic;
switch (generic->notify.type) {
case ACPI_HEST_NOTIFY_SCI:
mutex_lock(&ghes_list_mutex);
list_del_rcu(&ghes->list);
if (list_empty(&ghes_sci))
unregister_acpi_hed_notifier(&ghes_notifier_sci);
mutex_unlock(&ghes_list_mutex);
break;
default:
BUG();
break;
}
synchronize_rcu();
ghes_fini(ghes);
kfree(ghes);
platform_set_drvdata(ghes_dev, NULL);
return 0;
}
static struct platform_driver ghes_platform_driver = {
.driver = {
.name = "GHES",
.owner = THIS_MODULE,
},
.probe = ghes_probe,
.remove = ghes_remove,
};
static int __init ghes_init(void) static int __init ghes_init(void)
{ {
int rc;
if (acpi_disabled) if (acpi_disabled)
return -ENODEV; return -ENODEV;
@ -391,32 +420,12 @@ static int __init ghes_init(void)
return -EINVAL; return -EINVAL;
} }
rc = apei_hest_parse(hest_ghes_parse, NULL); return platform_driver_register(&ghes_platform_driver);
if (rc) {
pr_err(GHES_PFX
"Error during parsing HEST generic hardware error sources.\n");
goto err_cleanup;
}
if (list_empty(&ghes_sci)) {
pr_info(GHES_PFX
"No functional generic hardware error sources.\n");
rc = -ENODEV;
goto err_cleanup;
}
pr_info(GHES_PFX
"Generic Hardware Error Source support is initialized.\n");
return 0;
err_cleanup:
ghes_cleanup();
return rc;
} }
static void __exit ghes_exit(void) static void __exit ghes_exit(void)
{ {
ghes_cleanup(); platform_driver_unregister(&ghes_platform_driver);
} }
module_init(ghes_init); module_init(ghes_init);
@ -425,3 +434,4 @@ module_exit(ghes_exit);
MODULE_AUTHOR("Huang Ying"); MODULE_AUTHOR("Huang Ying");
MODULE_DESCRIPTION("APEI Generic Hardware Error Source support"); MODULE_DESCRIPTION("APEI Generic Hardware Error Source support");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:GHES");

View File

@ -34,6 +34,7 @@
#include <linux/kdebug.h> #include <linux/kdebug.h>
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/io.h> #include <linux/io.h>
#include <linux/platform_device.h>
#include <acpi/apei.h> #include <acpi/apei.h>
#include "apei-internal.h" #include "apei-internal.h"
@ -47,11 +48,6 @@ EXPORT_SYMBOL_GPL(hest_disable);
static struct acpi_table_hest *hest_tab; static struct acpi_table_hest *hest_tab;
static int hest_void_parse(struct acpi_hest_header *hest_hdr, void *data)
{
return 0;
}
static int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = { static int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = {
[ACPI_HEST_TYPE_IA32_CHECK] = -1, /* need further calculation */ [ACPI_HEST_TYPE_IA32_CHECK] = -1, /* need further calculation */
[ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1, [ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1,
@ -125,6 +121,69 @@ int apei_hest_parse(apei_hest_func_t func, void *data)
} }
EXPORT_SYMBOL_GPL(apei_hest_parse); EXPORT_SYMBOL_GPL(apei_hest_parse);
struct ghes_arr {
struct platform_device **ghes_devs;
unsigned int count;
};
static int hest_parse_ghes_count(struct acpi_hest_header *hest_hdr, void *data)
{
int *count = data;
if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR)
(*count)++;
return 0;
}
static int hest_parse_ghes(struct acpi_hest_header *hest_hdr, void *data)
{
struct acpi_hest_generic *generic;
struct platform_device *ghes_dev;
struct ghes_arr *ghes_arr = data;
int rc;
if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR)
return 0;
generic = (struct acpi_hest_generic *)hest_hdr;
if (!generic->enabled)
return 0;
ghes_dev = platform_device_alloc("GHES", hest_hdr->source_id);
if (!ghes_dev)
return -ENOMEM;
ghes_dev->dev.platform_data = generic;
rc = platform_device_add(ghes_dev);
if (rc)
goto err;
ghes_arr->ghes_devs[ghes_arr->count++] = ghes_dev;
return 0;
err:
platform_device_put(ghes_dev);
return rc;
}
static int hest_ghes_dev_register(unsigned int ghes_count)
{
int rc, i;
struct ghes_arr ghes_arr;
ghes_arr.count = 0;
ghes_arr.ghes_devs = kmalloc(sizeof(void *) * ghes_count, GFP_KERNEL);
if (!ghes_arr.ghes_devs)
return -ENOMEM;
rc = apei_hest_parse(hest_parse_ghes, &ghes_arr);
if (rc)
goto err;
out:
kfree(ghes_arr.ghes_devs);
return rc;
err:
for (i = 0; i < ghes_arr.count; i++)
platform_device_unregister(ghes_arr.ghes_devs[i]);
goto out;
}
static int __init setup_hest_disable(char *str) static int __init setup_hest_disable(char *str)
{ {
hest_disable = 1; hest_disable = 1;
@ -137,6 +196,7 @@ static int __init hest_init(void)
{ {
acpi_status status; acpi_status status;
int rc = -ENODEV; int rc = -ENODEV;
unsigned int ghes_count = 0;
if (acpi_disabled) if (acpi_disabled)
goto err; goto err;
@ -158,7 +218,11 @@ static int __init hest_init(void)
goto err; goto err;
} }
rc = apei_hest_parse(hest_void_parse, NULL); rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
if (rc)
goto err;
rc = hest_ghes_dev_register(ghes_count);
if (rc) if (rc)
goto err; goto err;

View File

@ -39,10 +39,10 @@
* Severity difinition for error_severity in struct cper_record_header * Severity difinition for error_severity in struct cper_record_header
* and section_severity in struct cper_section_descriptor * and section_severity in struct cper_section_descriptor
*/ */
#define CPER_SER_RECOVERABLE 0x0 #define CPER_SEV_RECOVERABLE 0x0
#define CPER_SER_FATAL 0x1 #define CPER_SEV_FATAL 0x1
#define CPER_SER_CORRECTED 0x2 #define CPER_SEV_CORRECTED 0x2
#define CPER_SER_INFORMATIONAL 0x3 #define CPER_SEV_INFORMATIONAL 0x3
/* /*
* Validation bits difinition for validation_bits in struct * Validation bits difinition for validation_bits in struct