From: Shiju Jose <shiju.jose@huawei.com>
To: <linux-acpi@vger.kernel.org>, <linux-edac@vger.kernel.org>,
<linux-kernel@vger.kernel.org>, <rjw@rjwysocki.net>,
<lenb@kernel.org>, <james.morse@arm.com>, <tony.luck@intel.com>,
<bp@alien8.de>, <baicar@os.amperecomputing.com>
Cc: <linuxarm@huawei.com>, <jonathan.cameron@huawei.com>,
<tanxiaofei@huawei.com>, Shiju Jose <shiju.jose@huawei.com>
Subject: [PATCH RFC 1/4] ACPI: APEI: Add support to notify the vendor specific HW errors
Date: Mon, 12 Aug 2019 11:11:46 +0100 [thread overview]
Message-ID: <20190812101149.26036-2-shiju.jose@huawei.com> (raw)
In-Reply-To: <20190812101149.26036-1-shiju.jose@huawei.com>
Presently the vendor specific HW errors, in the non-standard format,
are not reported to the vendor drivers for the recovery.
This patch adds support to notify the vendor specific HW errors to the
registered kernel drivers.
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
drivers/acpi/apei/ghes.c | 118 +++++++++++++++++++++++++++++++++++++++++++++--
include/acpi/ghes.h | 47 +++++++++++++++++++
2 files changed, 160 insertions(+), 5 deletions(-)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index a66e00f..374d197 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -477,6 +477,77 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
#endif
}
+struct ghes_error_notify {
+ struct list_head list;
+ struct rcu_head rcu_head;
+ guid_t sec_type; /* guid of the error record */
+ error_handle handle; /* error handler function */
+ void *data; /* handler driver's private data if any */
+};
+
+/* List to store the registered error handling functions */
+static DEFINE_MUTEX(ghes_error_notify_mutex);
+static LIST_HEAD(ghes_error_notify_list);
+static refcount_t ghes_ref_count;
+
+/**
+ * ghes_error_notify_register - register an error handling function
+ * for the hw errors.
+ * @sec_type: sec_type of the corresponding CPER to be notified.
+ * @handle: pointer to the error handling function.
+ * @data: handler driver's private data.
+ *
+ * return 0 : SUCCESS, non-zero : FAIL
+ */
+int ghes_error_notify_register(guid_t sec_type, error_handle handle, void *data)
+{
+ struct ghes_error_notify *err_notify;
+
+ mutex_lock(&ghes_error_notify_mutex);
+ err_notify = kzalloc(sizeof(*err_notify), GFP_KERNEL);
+ if (!err_notify)
+ return -ENOMEM;
+
+ err_notify->handle = handle;
+ guid_copy(&err_notify->sec_type, &sec_type);
+ err_notify->data = data;
+ list_add_rcu(&err_notify->list, &ghes_error_notify_list);
+ mutex_unlock(&ghes_error_notify_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ghes_error_notify_register);
+
+/**
+ * ghes_error_notify_unregister - unregister an error handling function.
+ * @sec_type: sec_type of the corresponding CPER.
+ * @handle: pointer to the error handling function.
+ *
+ * return none.
+ */
+void ghes_error_notify_unregister(guid_t sec_type, error_handle handle)
+{
+ struct ghes_error_notify *err_notify;
+ bool found = 0;
+
+ mutex_lock(&ghes_error_notify_mutex);
+ rcu_read_lock();
+ list_for_each_entry_rcu(err_notify, &ghes_error_notify_list, list) {
+ if (guid_equal(&err_notify->sec_type, &sec_type) &&
+ err_notify->handle == handle) {
+ list_del_rcu(&err_notify->list);
+ found = 1;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ synchronize_rcu();
+ mutex_unlock(&ghes_error_notify_mutex);
+ if (found)
+ kfree(err_notify);
+}
+EXPORT_SYMBOL_GPL(ghes_error_notify_unregister);
+
static void ghes_do_proc(struct ghes *ghes,
const struct acpi_hest_generic_status *estatus)
{
@@ -485,6 +556,8 @@ static void ghes_do_proc(struct ghes *ghes,
guid_t *sec_type;
guid_t *fru_id = &NULL_UUID_LE;
char *fru_text = "";
+ bool is_notify = 0;
+ struct ghes_error_notify *err_notify;
sev = ghes_severity(estatus->error_severity);
apei_estatus_for_each_section(estatus, gdata) {
@@ -512,11 +585,29 @@ static void ghes_do_proc(struct ghes *ghes,
log_arm_hw_error(err);
} else {
- void *err = acpi_hest_get_payload(gdata);
-
- log_non_standard_event(sec_type, fru_id, fru_text,
- sec_sev, err,
- gdata->error_data_length);
+ rcu_read_lock();
+ list_for_each_entry_rcu(err_notify,
+ &ghes_error_notify_list, list) {
+ if (guid_equal(&err_notify->sec_type,
+ sec_type)) {
+ /* The notification is called in the
+ * interrupt context, thus the handler
+ * functions should be take care of it.
+ */
+ err_notify->handle(gdata, sev,
+ err_notify->data);
+ is_notify = 1;
+ }
+ }
+ rcu_read_unlock();
+
+ if (!is_notify) {
+ void *err = acpi_hest_get_payload(gdata);
+
+ log_non_standard_event(sec_type, fru_id,
+ fru_text, sec_sev, err,
+ gdata->error_data_length);
+ }
}
}
}
@@ -1217,6 +1308,11 @@ static int ghes_probe(struct platform_device *ghes_dev)
ghes_edac_register(ghes, &ghes_dev->dev);
+ if (!refcount_read(&ghes_ref_count))
+ refcount_set(&ghes_ref_count, 1);
+ else
+ refcount_inc(&ghes_ref_count);
+
/* Handle any pending errors right away */
spin_lock_irqsave(&ghes_notify_lock_irq, flags);
ghes_proc(ghes);
@@ -1237,6 +1333,7 @@ static int ghes_remove(struct platform_device *ghes_dev)
int rc;
struct ghes *ghes;
struct acpi_hest_generic *generic;
+ struct ghes_error_notify *err_notify, *tmp;
ghes = platform_get_drvdata(ghes_dev);
generic = ghes->generic;
@@ -1279,6 +1376,17 @@ static int ghes_remove(struct platform_device *ghes_dev)
ghes_fini(ghes);
+ if (refcount_dec_and_test(&ghes_ref_count) &&
+ !list_empty(&ghes_error_notify_list)) {
+ mutex_lock(&ghes_error_notify_mutex);
+ list_for_each_entry_safe(err_notify, tmp,
+ &ghes_error_notify_list, list) {
+ list_del_rcu(&err_notify->list);
+ kfree_rcu(err_notify, rcu_head);
+ }
+ mutex_unlock(&ghes_error_notify_mutex);
+ }
+
ghes_edac_unregister(ghes);
kfree(ghes);
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index e3f1cdd..d480537 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -50,6 +50,53 @@ enum {
GHES_SEV_PANIC = 0x3,
};
+/**
+ * error_handle - error handling function for the hw errors.
+ * This handle function is called in the interrupt context.
+ * @gdata: acpi_hest_generic_data.
+ * @sev: error severity of the entire error event defined in the
+ * ACPI spec table generic error status block.
+ * @data: handler driver's private data.
+ *
+ * return : none.
+ */
+typedef void (*error_handle)(struct acpi_hest_generic_data *gdata, int sev,
+ void *data);
+
+#ifdef CONFIG_ACPI_APEI_GHES
+/**
+ * ghes_error_notify_register - register an error handling function
+ * for the hw errors.
+ * @sec_type: sec_type of the corresponding CPER to be notified.
+ * @handle: pointer to the error handling function.
+ * @data: handler driver's private data.
+ *
+ * return : 0 - SUCCESS, non-zero - FAIL.
+ */
+int ghes_error_notify_register(guid_t sec_type, error_handle handle,
+ void *data);
+
+/**
+ * ghes_error_notify_unregister - unregister an error handling function
+ * for the hw errors.
+ * @sec_type: sec_type of the corresponding CPER.
+ * @handle: pointer to the error handling function.
+ *
+ * return none.
+ */
+void ghes_error_notify_unregister(guid_t sec_type, error_handle handle);
+
+#else
+int ghes_error_notify_register(guid_t sec_type, error_handle handle, void *data)
+{
+ return -ENODEV;
+}
+
+void ghes_error_notify_unregister(guid_t sec_type, error_handle handle)
+{
+}
+#endif
+
int ghes_estatus_pool_init(int num_ghes);
/* From drivers/edac/ghes_edac.c */
--
1.9.1
next prev parent reply other threads:[~2019-08-12 10:12 UTC|newest]
Thread overview: 72+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <Shiju Jose>
2019-08-12 10:11 ` [PATCH RFC 0/4] ACPI: APEI: Add support to notify the vendor specific HW errors Shiju Jose
2019-08-12 10:11 ` Shiju Jose [this message]
2019-08-21 17:23 ` [PATCH RFC 1/4] " James Morse
2019-08-22 16:57 ` Shiju Jose
2019-08-12 10:11 ` [PATCH RFC 2/4] ACPI: APEI: Add ghes_handle_memory_failure to the new notification method Shiju Jose
2019-08-21 17:22 ` James Morse
2019-08-22 16:57 ` Shiju Jose
2019-08-12 10:11 ` [PATCH RFC 3/4] ACPI: APEI: Add ghes_handle_aer " Shiju Jose
2019-08-12 10:11 ` [PATCH RFC 4/4] ACPI: APEI: Add log_arm_hw_error " Shiju Jose
2019-08-21 17:22 ` [PATCH RFC 0/4] ACPI: APEI: Add support to notify the vendor specific HW errors James Morse
2019-08-22 16:56 ` Shiju Jose
2019-10-03 17:21 ` James Morse
2020-01-15 11:01 ` [RFC PATCH 0/2] " Shiju Jose
2020-01-15 11:01 ` [RFC PATCH 1/2] " Shiju Jose
2020-01-15 11:01 ` [RFC PATCH 2/2] PCI:hip08:Add driver to handle HiSilicon hip08 PCIe controller's errors Shiju Jose
2020-01-15 14:13 ` Bjorn Helgaas
2020-01-17 9:40 ` Shiju Jose
2020-01-24 12:39 ` [PATCH v2 0/2] ACPI: APEI: Add support to notify the vendor specific HW errors Shiju Jose
2020-01-24 12:39 ` [PATCH v2 1/2] " Shiju Jose
2020-01-24 12:39 ` [PATCH v2 2/2] PCI: hip: Add handling of HiSilicon hip PCIe controller's errors Shiju Jose
2020-01-24 14:30 ` Bjorn Helgaas
2020-01-26 18:12 ` kbuild test robot
2020-01-26 18:12 ` [RFC PATCH] PCI: hip: hisi_pcie_sec_type can be static kbuild test robot
2020-02-03 16:51 ` [PATCH v3 0/2] ACPI: APEI: Add support to notify the vendor specific HW errors Shiju Jose
2020-02-03 16:51 ` [PATCH v3 1/2] " Shiju Jose
2020-02-03 16:51 ` [PATCH v3 2/2] PCI: HIP: Add handling of HiSilicon HIP PCIe controller's errors Shiju Jose
2020-02-04 14:31 ` Dan Carpenter
2020-02-07 10:31 ` [PATCH v4 0/2] ACPI: APEI: Add support to notify the vendor specific HW errors Shiju Jose
2020-02-07 10:31 ` [PATCH v4 1/2] " Shiju Jose
2020-03-11 17:29 ` James Morse
2020-03-12 12:10 ` Shiju Jose
2020-03-13 15:17 ` James Morse
2020-03-13 17:08 ` Shiju Jose
2020-02-07 10:31 ` [PATCH v4 2/2] PCI: HIP: Add handling of HiSilicon HIP PCIe controller errors Shiju Jose
2020-03-09 9:23 ` [PATCH v4 0/2] ACPI: APEI: Add support to notify the vendor specific HW errors Shiju Jose
2020-03-11 17:27 ` James Morse
2020-03-25 16:42 ` [PATCH v6 0/2] ACPI / " Shiju Jose
2020-03-25 16:42 ` [PATCH v6 1/2] " Shiju Jose
2020-03-27 18:22 ` Borislav Petkov
2020-03-30 10:14 ` Shiju Jose
2020-03-30 10:33 ` Borislav Petkov
2020-03-30 11:55 ` Shiju Jose
2020-03-30 13:42 ` Borislav Petkov
2020-03-30 15:44 ` Shiju Jose
2020-03-31 9:09 ` Borislav Petkov
2020-04-08 9:20 ` Shiju Jose
2020-04-08 10:03 ` James Morse
2020-04-21 13:18 ` Shiju Jose
2020-05-11 11:20 ` Shiju Jose
2020-03-25 16:42 ` [PATCH v6 2/2] PCI: hip: Add handling of HiSilicon HIP PCIe controller errors Shiju Jose
2020-03-27 15:07 ` [PATCH v6 0/2] ACPI / APEI: Add support to notify the vendor specific HW errors Bjorn Helgaas
2020-04-07 12:00 ` [v7 PATCH 0/6] ACPI / APEI: Add support to notify non-fatal " Shiju Jose
2020-04-07 12:00 ` [v7 PATCH 1/6] ACPI / APEI: Add support to queuing up the non-fatal HW errors and notify Shiju Jose
2020-04-08 19:41 ` kbuild test robot
2020-04-08 19:41 ` [RFC PATCH] ACPI / APEI: ghes_gdata_pool_init() can be static kbuild test robot
2020-04-07 12:00 ` [v7 PATCH 2/6] ACPI / APEI: Add callback for memory errors to the GHES notifier Shiju Jose
2020-04-07 12:00 ` [v7 PATCH 3/6] ACPI / APEI: Add callback for AER " Shiju Jose
2020-04-07 12:00 ` [v7 PATCH 4/6] ACPI / APEI: Add callback for ARM HW errors " Shiju Jose
2020-04-07 12:00 ` [v7 PATCH 5/6] ACPI / APEI: Add callback for non-standard " Shiju Jose
2020-04-07 12:00 ` [v7 PATCH 6/6] PCI: hip: Add handling of HiSilicon HIP PCIe controller errors Shiju Jose
2020-04-07 22:03 ` kbuild test robot
2020-04-21 13:21 ` [RESEND PATCH v7 0/6] ACPI / APEI: Add support to notify non-fatal HW errors Shiju Jose
2020-04-21 13:21 ` [RESEND PATCH v7 1/6] ACPI / APEI: Add support to queuing up the non-fatal HW errors and notify Shiju Jose
2020-04-21 14:12 ` Dan Carpenter
2020-04-21 13:21 ` [RESEND PATCH v7 2/6] ACPI / APEI: Add callback for memory errors to the GHES notifier Shiju Jose
2020-04-21 13:21 ` [RESEND PATCH v7 3/6] ACPI / APEI: Add callback for AER " Shiju Jose
2020-04-21 13:21 ` [RESEND PATCH v7 4/6] ACPI / APEI: Add callback for ARM HW errors " Shiju Jose
2020-04-21 14:14 ` Dan Carpenter
2020-04-21 15:18 ` Shiju Jose
2020-04-21 13:21 ` [RESEND PATCH v7 5/6] ACPI / APEI: Add callback for non-standard " Shiju Jose
2020-04-21 13:21 ` [RESEND PATCH v7 6/6] PCI: hip: Add handling of HiSilicon HIP PCIe controller errors Shiju Jose
2020-04-21 14:20 ` Dan Carpenter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190812101149.26036-2-shiju.jose@huawei.com \
--to=shiju.jose@huawei.com \
--cc=baicar@os.amperecomputing.com \
--cc=bp@alien8.de \
--cc=james.morse@arm.com \
--cc=jonathan.cameron@huawei.com \
--cc=lenb@kernel.org \
--cc=linux-acpi@vger.kernel.org \
--cc=linux-edac@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linuxarm@huawei.com \
--cc=rjw@rjwysocki.net \
--cc=tanxiaofei@huawei.com \
--cc=tony.luck@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).