Linux-EDAC Archive on lore.kernel.org
 help / color / Atom feed
From: Shiju Jose <shiju.jose@huawei.com>
To: <linux-acpi@vger.kernel.org>, <linux-edac@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>, <rjw@rjwysocki.net>,
	<lenb@kernel.org>, <james.morse@arm.com>, <tony.luck@intel.com>,
	<bp@alien8.de>, <baicar@os.amperecomputing.com>
Cc: <linuxarm@huawei.com>, <jonathan.cameron@huawei.com>,
	<tanxiaofei@huawei.com>, Shiju Jose <shiju.jose@huawei.com>
Subject: [PATCH RFC 1/4] ACPI: APEI: Add support to notify the vendor specific HW errors
Date: Mon, 12 Aug 2019 11:11:46 +0100
Message-ID: <20190812101149.26036-2-shiju.jose@huawei.com> (raw)
In-Reply-To: <20190812101149.26036-1-shiju.jose@huawei.com>

Presently the vendor specific HW errors, in the non-standard format,
are not reported to the vendor drivers for the recovery.

This patch adds support to notify the vendor specific HW errors to the
registered kernel drivers.

Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
 drivers/acpi/apei/ghes.c | 118 +++++++++++++++++++++++++++++++++++++++++++++--
 include/acpi/ghes.h      |  47 +++++++++++++++++++
 2 files changed, 160 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index a66e00f..374d197 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -477,6 +477,77 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
 #endif
 }
 
+struct ghes_error_notify {
+	struct list_head list;
+	struct rcu_head	rcu_head;
+	guid_t sec_type; /* guid of the error record */
+	error_handle handle; /* error handler function */
+	void *data; /* handler driver's private data if any */
+};
+
+/* List to store the registered error handling functions */
+static DEFINE_MUTEX(ghes_error_notify_mutex);
+static LIST_HEAD(ghes_error_notify_list);
+static refcount_t ghes_ref_count;
+
+/**
+ * ghes_error_notify_register - register an error handling function
+ * for the hw errors.
+ * @sec_type: sec_type of the corresponding CPER to be notified.
+ * @handle: pointer to the error handling function.
+ * @data: handler driver's private data.
+ *
+ * return 0 : SUCCESS, non-zero : FAIL
+ */
+int ghes_error_notify_register(guid_t sec_type, error_handle handle, void *data)
+{
+	struct ghes_error_notify *err_notify;
+
+	mutex_lock(&ghes_error_notify_mutex);
+	err_notify = kzalloc(sizeof(*err_notify), GFP_KERNEL);
+	if (!err_notify)
+		return -ENOMEM;
+
+	err_notify->handle = handle;
+	guid_copy(&err_notify->sec_type, &sec_type);
+	err_notify->data = data;
+	list_add_rcu(&err_notify->list, &ghes_error_notify_list);
+	mutex_unlock(&ghes_error_notify_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ghes_error_notify_register);
+
+/**
+ * ghes_error_notify_unregister - unregister an error handling function.
+ * @sec_type: sec_type of the corresponding CPER.
+ * @handle: pointer to the error handling function.
+ *
+ * return none.
+ */
+void ghes_error_notify_unregister(guid_t sec_type, error_handle handle)
+{
+	struct ghes_error_notify *err_notify;
+	bool found = 0;
+
+	mutex_lock(&ghes_error_notify_mutex);
+	rcu_read_lock();
+	list_for_each_entry_rcu(err_notify, &ghes_error_notify_list, list) {
+		if (guid_equal(&err_notify->sec_type, &sec_type) &&
+		    err_notify->handle == handle) {
+			list_del_rcu(&err_notify->list);
+			found = 1;
+			break;
+		}
+	}
+	rcu_read_unlock();
+	synchronize_rcu();
+	mutex_unlock(&ghes_error_notify_mutex);
+	if (found)
+		kfree(err_notify);
+}
+EXPORT_SYMBOL_GPL(ghes_error_notify_unregister);
+
 static void ghes_do_proc(struct ghes *ghes,
 			 const struct acpi_hest_generic_status *estatus)
 {
@@ -485,6 +556,8 @@ static void ghes_do_proc(struct ghes *ghes,
 	guid_t *sec_type;
 	guid_t *fru_id = &NULL_UUID_LE;
 	char *fru_text = "";
+	bool is_notify = 0;
+	struct ghes_error_notify *err_notify;
 
 	sev = ghes_severity(estatus->error_severity);
 	apei_estatus_for_each_section(estatus, gdata) {
@@ -512,11 +585,29 @@ static void ghes_do_proc(struct ghes *ghes,
 
 			log_arm_hw_error(err);
 		} else {
-			void *err = acpi_hest_get_payload(gdata);
-
-			log_non_standard_event(sec_type, fru_id, fru_text,
-					       sec_sev, err,
-					       gdata->error_data_length);
+			rcu_read_lock();
+			list_for_each_entry_rcu(err_notify,
+						&ghes_error_notify_list, list) {
+				if (guid_equal(&err_notify->sec_type,
+					       sec_type)) {
+					/* The notification is called in the
+					 * interrupt context, thus the handler
+					 * functions should be take care of it.
+					 */
+					err_notify->handle(gdata, sev,
+							   err_notify->data);
+					is_notify = 1;
+				}
+			}
+			rcu_read_unlock();
+
+			if (!is_notify) {
+				void *err = acpi_hest_get_payload(gdata);
+
+				log_non_standard_event(sec_type, fru_id,
+						       fru_text, sec_sev, err,
+						       gdata->error_data_length);
+			}
 		}
 	}
 }
@@ -1217,6 +1308,11 @@ static int ghes_probe(struct platform_device *ghes_dev)
 
 	ghes_edac_register(ghes, &ghes_dev->dev);
 
+	if (!refcount_read(&ghes_ref_count))
+		refcount_set(&ghes_ref_count, 1);
+	else
+		refcount_inc(&ghes_ref_count);
+
 	/* Handle any pending errors right away */
 	spin_lock_irqsave(&ghes_notify_lock_irq, flags);
 	ghes_proc(ghes);
@@ -1237,6 +1333,7 @@ static int ghes_remove(struct platform_device *ghes_dev)
 	int rc;
 	struct ghes *ghes;
 	struct acpi_hest_generic *generic;
+	struct ghes_error_notify *err_notify, *tmp;
 
 	ghes = platform_get_drvdata(ghes_dev);
 	generic = ghes->generic;
@@ -1279,6 +1376,17 @@ static int ghes_remove(struct platform_device *ghes_dev)
 
 	ghes_fini(ghes);
 
+	if (refcount_dec_and_test(&ghes_ref_count) &&
+	    !list_empty(&ghes_error_notify_list)) {
+		mutex_lock(&ghes_error_notify_mutex);
+		list_for_each_entry_safe(err_notify, tmp,
+					 &ghes_error_notify_list, list) {
+			list_del_rcu(&err_notify->list);
+			kfree_rcu(err_notify, rcu_head);
+		}
+		mutex_unlock(&ghes_error_notify_mutex);
+	}
+
 	ghes_edac_unregister(ghes);
 
 	kfree(ghes);
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index e3f1cdd..d480537 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -50,6 +50,53 @@ enum {
 	GHES_SEV_PANIC = 0x3,
 };
 
+/**
+ * error_handle - error handling function for the hw errors.
+ * This handle function is called in the interrupt context.
+ * @gdata: acpi_hest_generic_data.
+ * @sev: error severity of the entire error event defined in the
+ * ACPI spec table generic error status block.
+ * @data: handler driver's private data.
+ *
+ * return : none.
+ */
+typedef void (*error_handle)(struct acpi_hest_generic_data *gdata, int sev,
+			     void *data);
+
+#ifdef CONFIG_ACPI_APEI_GHES
+/**
+ * ghes_error_notify_register - register an error handling function
+ * for the hw errors.
+ * @sec_type: sec_type of the corresponding CPER to be notified.
+ * @handle: pointer to the error handling function.
+ * @data: handler driver's private data.
+ *
+ * return : 0 - SUCCESS, non-zero - FAIL.
+ */
+int ghes_error_notify_register(guid_t sec_type, error_handle handle,
+			       void *data);
+
+/**
+ * ghes_error_notify_unregister - unregister an error handling function
+ * for the hw errors.
+ * @sec_type: sec_type of the corresponding CPER.
+ * @handle: pointer to the error handling function.
+ *
+ * return none.
+ */
+void ghes_error_notify_unregister(guid_t sec_type, error_handle handle);
+
+#else
+int ghes_error_notify_register(guid_t sec_type, error_handle handle, void *data)
+{
+	return -ENODEV;
+}
+
+void ghes_error_notify_unregister(guid_t sec_type, error_handle handle)
+{
+}
+#endif
+
 int ghes_estatus_pool_init(int num_ghes);
 
 /* From drivers/edac/ghes_edac.c */
-- 
1.9.1



  reply index

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <Shiju Jose>
2019-06-17 14:28 ` [PATCH 0/6] rasdaemon:add logging of HiSilicon HIP08 non-standard H/W errors and changes in the error decoding code Shiju Jose
2019-06-17 14:28   ` [PATCH 1/6] rasdaemon:print non-standard error data if not decoded Shiju Jose
2019-06-17 14:28   ` [PATCH 2/6] rasdaemon: rearrange HiSilicon HIP07 decoding function table Shiju Jose
2019-06-17 14:28   ` [PATCH 3/6] rasdaemon: update iteration logic for the non-standard error decoding functions Shiju Jose
2019-06-17 14:28   ` [PATCH 4/6] rasdaemon:add logging HiSilicon HIP08 H/W errors reported in the OEM format1 Shiju Jose
2019-06-17 14:28   ` [PATCH 5/6] rasdaemon:add logging HiSilicon HIP08 H/W errors reported in the OEM format2 Shiju Jose
2019-06-17 14:28   ` [PATCH 6/6] rasdaemon:add logging HiSilicon HIP08 PCIe local errors Shiju Jose
2019-06-21 18:42   ` [PATCH 0/6] rasdaemon:add logging of HiSilicon HIP08 non-standard H/W errors and changes in the error decoding code Mauro Carvalho Chehab
2019-08-12 10:11 ` [PATCH RFC 0/4] ACPI: APEI: Add support to notify the vendor specific HW errors Shiju Jose
2019-08-12 10:11   ` Shiju Jose [this message]
2019-08-12 10:11   ` [PATCH RFC 2/4] ACPI: APEI: Add ghes_handle_memory_failure to the new notification method Shiju Jose
2019-08-12 10:11   ` [PATCH RFC 3/4] ACPI: APEI: Add ghes_handle_aer " Shiju Jose
2019-08-12 10:11   ` [PATCH RFC 4/4] ACPI: APEI: Add log_arm_hw_error " Shiju Jose

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190812101149.26036-2-shiju.jose@huawei.com \
    --to=shiju.jose@huawei.com \
    --cc=baicar@os.amperecomputing.com \
    --cc=bp@alien8.de \
    --cc=james.morse@arm.com \
    --cc=jonathan.cameron@huawei.com \
    --cc=lenb@kernel.org \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linuxarm@huawei.com \
    --cc=rjw@rjwysocki.net \
    --cc=tanxiaofei@huawei.com \
    --cc=tony.luck@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-EDAC Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-edac/0 linux-edac/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-edac linux-edac/ https://lore.kernel.org/linux-edac \
		linux-edac@vger.kernel.org linux-edac@archiver.kernel.org
	public-inbox-index linux-edac


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-edac


AGPL code for this site: git clone https://public-inbox.org/ public-inbox