All of lore.kernel.org
 help / color / mirror / Atom feed
From: Huang Ying <ying.huang@intel.com>
To: Len Brown <lenb@kernel.org>
Cc: linux-kernel@vger.kernel.org, Andi Kleen <andi@firstfloor.org>,
	ying.huang@intel.com, linux-acpi@vger.kernel.org
Subject: [PATCH -v2 8/9] ACPI, APEI, Report GHES error record with hardware error device core
Date: Mon, 25 Oct 2010 15:43:29 +0800	[thread overview]
Message-ID: <1287992610-14996-9-git-send-email-ying.huang@intel.com> (raw)
In-Reply-To: <1287992610-14996-1-git-send-email-ying.huang@intel.com>

One hardware error device (struct herr_dev) is created for each GHES
in GHES platform device "probe" function.  Then when GHES hardware
error handler is notified by firmware, the hardware error records will
be reported on the struct herr_dev.

In the previous GHES support, only corrected memory error can be
reported to user space via /dev/mcelog, now all kinds of hardware
errors notified with SCI can be reported.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
---
 drivers/acpi/apei/cper.c |   18 +++++++
 drivers/acpi/apei/ghes.c |  119 +++++++++++++++++++++++++++++++----------------
 include/linux/cper.h     |    2 
 3 files changed, 99 insertions(+), 40 deletions(-)

--- a/drivers/acpi/apei/cper.c
+++ b/drivers/acpi/apei/cper.c
@@ -49,6 +49,24 @@ int herr_severity_to_cper(int herr_sever
 }
 EXPORT_SYMBOL_GPL(herr_severity_to_cper);
 
+int cper_severity_to_herr(int cper_severity)
+{
+	switch (cper_severity) {
+	case CPER_SEV_INFORMATIONAL:
+		return HERR_SEV_NONE;
+	case CPER_SEV_CORRECTED:
+		return HERR_SEV_CORRECTED;
+	case CPER_SEV_RECOVERABLE:
+		return HERR_SEV_RECOVERABLE;
+	case CPER_SEV_FATAL:
+		return HERR_SEV_FATAL;
+	default:
+		/* Unknown, default to fatal */
+		return HERR_SEV_FATAL;
+	}
+}
+EXPORT_SYMBOL_GPL(cper_severity_to_herr);
+
 /*
  * CPER record ID need to be unique even after reboot, because record
  * ID is used as index for ERST storage, while CPER records from
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -43,6 +43,7 @@
 #include <linux/kdebug.h>
 #include <linux/platform_device.h>
 #include <linux/mutex.h>
+#include <linux/herror.h>
 #include <acpi/apei.h>
 #include <acpi/atomicio.h>
 #include <acpi/hed.h>
@@ -74,6 +75,7 @@ struct ghes {
 	struct list_head list;
 	u64 buffer_paddr;
 	unsigned long flags;
+	struct herr_dev *herr_dev;
 };
 
 /*
@@ -238,9 +240,38 @@ static void ghes_clear_estatus(struct gh
 	ghes->flags &= ~GHES_TO_CLEAR;
 }
 
+static void ghes_report(struct ghes *ghes)
+{
+	struct herr_record *ercd;
+	struct herr_section *esec;
+	struct acpi_hest_generic_status *estatus;
+	unsigned int estatus_len, ercd_alloc_flags = 0;
+	int ghes_sev;
+
+	ghes_sev = ghes_severity(ghes->estatus->error_severity);
+	if (ghes_sev >= GHES_SEV_PANIC)
+		ercd_alloc_flags |= HERR_ALLOC_NO_BURST_CONTROL;
+	estatus_len = apei_estatus_len(ghes->estatus);
+	ercd = herr_record_alloc(HERR_RECORD_LEN_ROUND1(estatus_len),
+				 ghes->herr_dev, ercd_alloc_flags);
+	if (!ercd)
+		return;
+
+	ercd->severity = cper_severity_to_herr(ghes->estatus->error_severity);
+
+	esec = herr_first_sec(ercd);
+	esec->length = HERR_SEC_LEN_ROUND(estatus_len);
+	esec->flags = 0;
+	esec->type = HERR_TYPE_GESR;
+
+	estatus = herr_sec_data(esec);
+	memcpy(estatus, ghes->estatus, estatus_len);
+	herr_record_report(ercd, ghes->herr_dev);
+}
+
 static void ghes_do_proc(struct ghes *ghes)
 {
-	int sev, processed = 0;
+	int sev;
 	struct acpi_hest_generic_data *gdata;
 
 	sev = ghes_severity(ghes->estatus->error_severity);
@@ -251,15 +282,9 @@ static void ghes_do_proc(struct ghes *gh
 			apei_mce_report_mem_error(
 				sev == GHES_SEV_CORRECTED,
 				(struct cper_sec_mem_err *)(gdata+1));
-			processed = 1;
 		}
 #endif
 	}
-
-	if (!processed && printk_ratelimit())
-		pr_warning(GHES_PFX
-		"Unknown error record from generic hardware error source: %d\n",
-			   ghes->generic->header.source_id);
 }
 
 static int ghes_proc(struct ghes *ghes)
@@ -269,7 +294,9 @@ static int ghes_proc(struct ghes *ghes)
 	rc = ghes_read_estatus(ghes, 0);
 	if (rc)
 		goto out;
+	ghes_report(ghes);
 	ghes_do_proc(ghes);
+	herr_notify();
 
 out:
 	ghes_clear_estatus(ghes);
@@ -300,41 +327,15 @@ static int __devinit ghes_probe(struct p
 {
 	struct acpi_hest_generic *generic;
 	struct ghes *ghes = NULL;
-	int rc = -EINVAL;
+	int rc;
 
+	rc = -ENODEV;
 	generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
 	if (!generic->enabled)
-		return -ENODEV;
-
-	if (generic->error_block_length <
-	    sizeof(struct acpi_hest_generic_status)) {
-		pr_warning(FW_BUG GHES_PFX
-"Invalid error block length: %u for generic hardware error source: %d\n",
-			   generic->error_block_length,
-			   generic->header.source_id);
 		goto err;
-	}
-	if (generic->records_to_preallocate == 0) {
-		pr_warning(FW_BUG GHES_PFX
-"Invalid records to preallocate: %u for generic hardware error source: %d\n",
-			   generic->records_to_preallocate,
-			   generic->header.source_id);
-		goto err;
-	}
-	ghes = ghes_new(generic);
-	if (IS_ERR(ghes)) {
-		rc = PTR_ERR(ghes);
-		ghes = NULL;
-		goto err;
-	}
-	if (generic->notify.type == ACPI_HEST_NOTIFY_SCI) {
-		mutex_lock(&ghes_list_mutex);
-		if (list_empty(&ghes_sci))
-			register_acpi_hed_notifier(&ghes_notifier_sci);
-		list_add_rcu(&ghes->list, &ghes_sci);
-		mutex_unlock(&ghes_list_mutex);
-	} else {
-		unsigned char *notify = NULL;
+
+	if (generic->notify.type != ACPI_HEST_NOTIFY_SCI) {
+		char *notify = NULL;
 
 		switch (generic->notify.type) {
 		case ACPI_HEST_NOTIFY_POLLED:
@@ -357,9 +358,46 @@ static int __devinit ghes_probe(struct p
 "Unknown notification type: %u for generic hardware error source: %d\n",
 			generic->notify.type, generic->header.source_id);
 		}
-		rc = -ENODEV;
 		goto err;
 	}
+
+	rc = -EIO;
+	if (generic->error_block_length <
+	    sizeof(struct acpi_hest_generic_status)) {
+		pr_warning(FW_BUG GHES_PFX
+"Invalid error block length: %u for generic hardware error source: %d\n",
+			   generic->error_block_length,
+			   generic->header.source_id);
+		goto err;
+	}
+	ghes = ghes_new(generic);
+	if (IS_ERR(ghes)) {
+		rc = PTR_ERR(ghes);
+		ghes = NULL;
+		goto err;
+	}
+	rc = -ENOMEM;
+	ghes->herr_dev = herr_dev_alloc();
+	if (!ghes->herr_dev)
+		goto err;
+	ghes->herr_dev->name = dev_name(&ghes_dev->dev);
+	ghes->herr_dev->dev.parent = &ghes_dev->dev;
+	rc = herr_dev_register(ghes->herr_dev);
+	if (rc) {
+		herr_dev_free(ghes->herr_dev);
+		goto err;
+	}
+	switch (generic->notify.type) {
+	case ACPI_HEST_NOTIFY_SCI:
+		mutex_lock(&ghes_list_mutex);
+		if (list_empty(&ghes_sci))
+			register_acpi_hed_notifier(&ghes_notifier_sci);
+		list_add_rcu(&ghes->list, &ghes_sci);
+		mutex_unlock(&ghes_list_mutex);
+		break;
+	default:
+		BUG();
+	}
 	platform_set_drvdata(ghes_dev, ghes);
 
 	return 0;
@@ -386,13 +424,14 @@ static int __devexit ghes_remove(struct
 		if (list_empty(&ghes_sci))
 			unregister_acpi_hed_notifier(&ghes_notifier_sci);
 		mutex_unlock(&ghes_list_mutex);
+		synchronize_rcu();
 		break;
 	default:
 		BUG();
 		break;
 	}
 
-	synchronize_rcu();
+	herr_dev_unregister(ghes->herr_dev);
 	ghes_fini(ghes);
 	kfree(ghes);
 
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -22,6 +22,7 @@
 #define LINUX_CPER_H
 
 #include <linux/uuid.h>
+#include <linux/herror_record.h>
 
 /* CPER record signature and the size */
 #define CPER_SIG_RECORD				"CPER"
@@ -310,6 +311,7 @@ struct cper_sec_mem_err {
 #pragma pack()
 
 int herr_severity_to_cper(int herr_severity);
+int cper_severity_to_herr(int cper_severity);
 u64 cper_next_record_id(void);
 
 #endif

  parent reply	other threads:[~2010-10-25  7:43 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-10-25  7:43 [PATCH -v2 0/9] ACPI, APEI patches for 2.6.37 Huang Ying
2010-10-25  7:43 ` [PATCH -v2 1/9] ACPI, APEI, Add ERST record ID cache Huang Ying
2010-10-25  7:43 ` [PATCH -v2 2/9] Add lock-less version of bitmap_set/clear Huang Ying
2010-10-25  7:43 ` [PATCH -v2 3/9] lock-less NULL terminated single list implementation Huang Ying
2010-10-25  7:43 ` [PATCH -v2 4/9] lock-less general memory allocator Huang Ying
2010-10-25  7:43 ` [PATCH -v2 5/9] Hardware error device core Huang Ying
2010-10-25  7:43 ` [PATCH -v2 6/9] Hardware error record persistent support Huang Ying
2010-10-25  7:43 ` [PATCH -v2 7/9] ACPI, APEI, Use ERST for hardware error persisting before panic Huang Ying
2010-10-25  7:43 ` Huang Ying [this message]
2010-10-25  7:43 ` [PATCH -v2 9/9] ACPI, APEI, Generic Hardware Error Source POLL/IRQ/NMI notification type support Huang Ying
2010-10-25  8:45   ` [NAK] " Ingo Molnar
2010-10-25  8:58     ` Huang Ying
2010-10-25  9:19       ` Andi Kleen
2010-10-25 11:15         ` Ingo Molnar
2010-10-25 12:04           ` Mauro Carvalho Chehab
2010-10-25 17:07             ` Tony Luck
2010-10-25 17:19               ` Mauro Carvalho Chehab
2010-10-25 12:37           ` Andi Kleen
2010-10-25 12:55             ` Ingo Molnar
2010-10-25 13:02               ` Ingo Molnar
2010-10-25 13:11               ` Andi Kleen
2010-10-25 13:47                 ` Ingo Molnar
2010-10-25 15:14                   ` Andi Kleen
2010-10-25 17:10                     ` Ingo Molnar
2010-10-27  8:25                       ` Ingo Molnar
2010-10-25 16:38         ` Thomas Gleixner
2010-10-25  9:25       ` Ingo Molnar
2010-10-25 17:14         ` Tony Luck
2010-10-25 20:23           ` Borislav Petkov
2010-10-25 21:23             ` Tony Luck
2010-10-25 21:23               ` Tony Luck
2010-10-25 21:51               ` Borislav Petkov
2010-10-25 21:51                 ` Borislav Petkov
2010-10-25 23:35                 ` Tony Luck
2010-10-26  6:26                   ` Borislav Petkov
2010-10-26  6:26                     ` Borislav Petkov
2010-10-25 23:35                 ` Tony Luck
2010-10-26  1:06     ` Len Brown
2010-10-26  4:53       ` Thomas Gleixner
2010-10-26  7:22         ` Ingo Molnar
2010-10-26  7:30           ` Huang Ying
2010-10-26  7:55             ` Ingo Molnar
2010-10-26  8:32               ` Huang Ying
2010-10-26 10:03                 ` Ingo Molnar
2010-10-26  8:38         ` Andi Kleen
2010-10-26 10:00           ` Thomas Gleixner
2010-10-26  8:52         ` Huang Ying
2010-10-26 10:15           ` Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1287992610-14996-9-git-send-email-ying.huang@intel.com \
    --to=ying.huang@intel.com \
    --cc=andi@firstfloor.org \
    --cc=lenb@kernel.org \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.