From: Shiju Jose <shiju.jose@huawei.com>
To: <linux-acpi@vger.kernel.org>, <linux-edac@vger.kernel.org>,
<linux-kernel@vger.kernel.org>, <rjw@rjwysocki.net>,
<lenb@kernel.org>, <james.morse@arm.com>, <tony.luck@intel.com>,
<bp@alien8.de>, <baicar@os.amperecomputing.com>
Cc: <linuxarm@huawei.com>, <jonathan.cameron@huawei.com>,
<tanxiaofei@huawei.com>, Shiju Jose <shiju.jose@huawei.com>
Subject: [PATCH RFC 1/4] ACPI: APEI: Add support to notify the vendor specific HW errors
Date: Mon, 12 Aug 2019 11:11:46 +0100 [thread overview]
Message-ID: <20190812101149.26036-2-shiju.jose@huawei.com> (raw)
In-Reply-To: <20190812101149.26036-1-shiju.jose@huawei.com>
Presently the vendor specific HW errors, in the non-standard format,
are not reported to the vendor drivers for the recovery.
This patch adds support to notify the vendor specific HW errors to the
registered kernel drivers.
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
drivers/acpi/apei/ghes.c | 118 +++++++++++++++++++++++++++++++++++++++++++++--
include/acpi/ghes.h | 47 +++++++++++++++++++
2 files changed, 160 insertions(+), 5 deletions(-)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index a66e00f..374d197 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -477,6 +477,77 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
#endif
}
+struct ghes_error_notify {
+ struct list_head list;
+ struct rcu_head rcu_head;
+ guid_t sec_type; /* guid of the error record */
+ error_handle handle; /* error handler function */
+ void *data; /* handler driver's private data if any */
+};
+
+/* List to store the registered error handling functions */
+static DEFINE_MUTEX(ghes_error_notify_mutex);
+static LIST_HEAD(ghes_error_notify_list);
+static refcount_t ghes_ref_count;
+
+/**
+ * ghes_error_notify_register - register an error handling function
+ * for the hw errors.
+ * @sec_type: sec_type of the corresponding CPER to be notified.
+ * @handle: pointer to the error handling function.
+ * @data: handler driver's private data.
+ *
+ * return 0 : SUCCESS, non-zero : FAIL
+ */
+int ghes_error_notify_register(guid_t sec_type, error_handle handle, void *data)
+{
+ struct ghes_error_notify *err_notify;
+
+ mutex_lock(&ghes_error_notify_mutex);
+ err_notify = kzalloc(sizeof(*err_notify), GFP_KERNEL);
+ if (!err_notify)
+ return -ENOMEM;
+
+ err_notify->handle = handle;
+ guid_copy(&err_notify->sec_type, &sec_type);
+ err_notify->data = data;
+ list_add_rcu(&err_notify->list, &ghes_error_notify_list);
+ mutex_unlock(&ghes_error_notify_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ghes_error_notify_register);
+
+/**
+ * ghes_error_notify_unregister - unregister an error handling function.
+ * @sec_type: sec_type of the corresponding CPER.
+ * @handle: pointer to the error handling function.
+ *
+ * return none.
+ */
+void ghes_error_notify_unregister(guid_t sec_type, error_handle handle)
+{
+ struct ghes_error_notify *err_notify;
+ bool found = 0;
+
+ mutex_lock(&ghes_error_notify_mutex);
+ rcu_read_lock();
+ list_for_each_entry_rcu(err_notify, &ghes_error_notify_list, list) {
+ if (guid_equal(&err_notify->sec_type, &sec_type) &&
+ err_notify->handle == handle) {
+ list_del_rcu(&err_notify->list);
+ found = 1;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ synchronize_rcu();
+ mutex_unlock(&ghes_error_notify_mutex);
+ if (found)
+ kfree(err_notify);
+}
+EXPORT_SYMBOL_GPL(ghes_error_notify_unregister);
+
static void ghes_do_proc(struct ghes *ghes,
const struct acpi_hest_generic_status *estatus)
{
@@ -485,6 +556,8 @@ static void ghes_do_proc(struct ghes *ghes,
guid_t *sec_type;
guid_t *fru_id = &NULL_UUID_LE;
char *fru_text = "";
+ bool is_notify = 0;
+ struct ghes_error_notify *err_notify;
sev = ghes_severity(estatus->error_severity);
apei_estatus_for_each_section(estatus, gdata) {
@@ -512,11 +585,29 @@ static void ghes_do_proc(struct ghes *ghes,
log_arm_hw_error(err);
} else {
- void *err = acpi_hest_get_payload(gdata);
-
- log_non_standard_event(sec_type, fru_id, fru_text,
- sec_sev, err,
- gdata->error_data_length);
+ rcu_read_lock();
+ list_for_each_entry_rcu(err_notify,
+ &ghes_error_notify_list, list) {
+ if (guid_equal(&err_notify->sec_type,
+ sec_type)) {
+ /* The notification is called in the
+ * interrupt context, thus the handler
+ * functions should be take care of it.
+ */
+ err_notify->handle(gdata, sev,
+ err_notify->data);
+ is_notify = 1;
+ }
+ }
+ rcu_read_unlock();
+
+ if (!is_notify) {
+ void *err = acpi_hest_get_payload(gdata);
+
+ log_non_standard_event(sec_type, fru_id,
+ fru_text, sec_sev, err,
+ gdata->error_data_length);
+ }
}
}
}
@@ -1217,6 +1308,11 @@ static int ghes_probe(struct platform_device *ghes_dev)
ghes_edac_register(ghes, &ghes_dev->dev);
+ if (!refcount_read(&ghes_ref_count))
+ refcount_set(&ghes_ref_count, 1);
+ else
+ refcount_inc(&ghes_ref_count);
+
/* Handle any pending errors right away */
spin_lock_irqsave(&ghes_notify_lock_irq, flags);
ghes_proc(ghes);
@@ -1237,6 +1333,7 @@ static int ghes_remove(struct platform_device *ghes_dev)
int rc;
struct ghes *ghes;
struct acpi_hest_generic *generic;
+ struct ghes_error_notify *err_notify, *tmp;
ghes = platform_get_drvdata(ghes_dev);
generic = ghes->generic;
@@ -1279,6 +1376,17 @@ static int ghes_remove(struct platform_device *ghes_dev)
ghes_fini(ghes);
+ if (refcount_dec_and_test(&ghes_ref_count) &&
+ !list_empty(&ghes_error_notify_list)) {
+ mutex_lock(&ghes_error_notify_mutex);
+ list_for_each_entry_safe(err_notify, tmp,
+ &ghes_error_notify_list, list) {
+ list_del_rcu(&err_notify->list);
+ kfree_rcu(err_notify, rcu_head);
+ }
+ mutex_unlock(&ghes_error_notify_mutex);
+ }
+
ghes_edac_unregister(ghes);
kfree(ghes);
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index e3f1cdd..d480537 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -50,6 +50,53 @@ enum {
GHES_SEV_PANIC = 0x3,
};
+/**
+ * error_handle - error handling function for the hw errors.
+ * This handle function is called in the interrupt context.
+ * @gdata: acpi_hest_generic_data.
+ * @sev: error severity of the entire error event defined in the
+ * ACPI spec table generic error status block.
+ * @data: handler driver's private data.
+ *
+ * return : none.
+ */
+typedef void (*error_handle)(struct acpi_hest_generic_data *gdata, int sev,
+ void *data);
+
+#ifdef CONFIG_ACPI_APEI_GHES
+/**
+ * ghes_error_notify_register - register an error handling function
+ * for the hw errors.
+ * @sec_type: sec_type of the corresponding CPER to be notified.
+ * @handle: pointer to the error handling function.
+ * @data: handler driver's private data.
+ *
+ * return : 0 - SUCCESS, non-zero - FAIL.
+ */
+int ghes_error_notify_register(guid_t sec_type, error_handle handle,
+ void *data);
+
+/**
+ * ghes_error_notify_unregister - unregister an error handling function
+ * for the hw errors.
+ * @sec_type: sec_type of the corresponding CPER.
+ * @handle: pointer to the error handling function.
+ *
+ * return none.
+ */
+void ghes_error_notify_unregister(guid_t sec_type, error_handle handle);
+
+#else
+int ghes_error_notify_register(guid_t sec_type, error_handle handle, void *data)
+{
+ return -ENODEV;
+}
+
+void ghes_error_notify_unregister(guid_t sec_type, error_handle handle)
+{
+}
+#endif
+
int ghes_estatus_pool_init(int num_ghes);
/* From drivers/edac/ghes_edac.c */
--
1.9.1
next prev parent reply other threads:[~2019-08-12 10:12 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <Shiju Jose>
2019-06-17 14:28 ` [PATCH 0/6] rasdaemon:add logging of HiSilicon HIP08 non-standard H/W errors and changes in the error decoding code Shiju Jose
2019-06-17 14:28 ` [PATCH 1/6] rasdaemon:print non-standard error data if not decoded Shiju Jose
2019-06-17 14:28 ` [PATCH 2/6] rasdaemon: rearrange HiSilicon HIP07 decoding function table Shiju Jose
2019-06-17 14:28 ` [PATCH 3/6] rasdaemon: update iteration logic for the non-standard error decoding functions Shiju Jose
2019-06-17 14:28 ` [PATCH 4/6] rasdaemon:add logging HiSilicon HIP08 H/W errors reported in the OEM format1 Shiju Jose
2019-06-17 14:28 ` [PATCH 5/6] rasdaemon:add logging HiSilicon HIP08 H/W errors reported in the OEM format2 Shiju Jose
2019-06-17 14:28 ` [PATCH 6/6] rasdaemon:add logging HiSilicon HIP08 PCIe local errors Shiju Jose
2019-06-21 18:42 ` [PATCH 0/6] rasdaemon:add logging of HiSilicon HIP08 non-standard H/W errors and changes in the error decoding code Mauro Carvalho Chehab
2019-08-12 10:11 ` [PATCH RFC 0/4] ACPI: APEI: Add support to notify the vendor specific HW errors Shiju Jose
2019-08-12 10:11 ` Shiju Jose [this message]
2019-08-21 17:23 ` [PATCH RFC 1/4] " James Morse
2019-08-22 16:57 ` Shiju Jose
2019-08-12 10:11 ` [PATCH RFC 2/4] ACPI: APEI: Add ghes_handle_memory_failure to the new notification method Shiju Jose
2019-08-21 17:22 ` James Morse
2019-08-22 16:57 ` Shiju Jose
2019-08-12 10:11 ` [PATCH RFC 3/4] ACPI: APEI: Add ghes_handle_aer " Shiju Jose
2019-08-12 10:11 ` [PATCH RFC 4/4] ACPI: APEI: Add log_arm_hw_error " Shiju Jose
2019-08-21 17:22 ` [PATCH RFC 0/4] ACPI: APEI: Add support to notify the vendor specific HW errors James Morse
2019-08-22 16:56 ` Shiju Jose
2019-10-03 17:21 ` James Morse
2019-10-16 16:33 ` [PATCH 0/7] rasdaemon: add fixes, database closure and signal handling Shiju Jose
2019-10-16 16:33 ` [PATCH 1/7] rasdaemon: fix cleanup issues in ras-events.c:read_ras_event_all_cpus() Shiju Jose
2019-10-16 16:33 ` [PATCH 2/7] rasdaemon: fix memory leak in ras-events.c:handle_ras_events() Shiju Jose
2019-10-16 16:33 ` [PATCH 3/7] rasdaemon: fix missing fclose in ras-events.c:select_tracing_timestamp() Shiju Jose
2019-10-16 16:33 ` [PATCH 4/7] rasdaemon: fix memory leak in ras-events.c:add_event_handler() Shiju Jose
2019-10-16 16:33 ` [PATCH 5/7] rasdaemon: delete multiple definitions of ARRAY_SIZE Shiju Jose
2019-10-16 16:34 ` [PATCH 6/7] rasdaemon: add closure and cleanups for the database Shiju Jose
2019-10-16 16:34 ` [PATCH 7/7] rasdaemon: add signal handling for the cleanup Shiju Jose
2019-11-13 16:38 ` [PATCH 0/7] rasdaemon: add fixes, database closure and signal handling Shiju Jose
2019-11-20 4:37 ` Mauro Carvalho Chehab
2019-11-13 16:31 ` [PATCH rasdaemon 0/2] rasdaemon: add fix for the sql table Shiju Jose
2019-11-13 16:31 ` [PATCH rasdaemon 1/2] rasdaemon: fix for the ras-record.c:ras_mc_prepare_stmt() failure when new fields added to " Shiju Jose
2019-11-13 16:31 ` [PATCH rasdaemon 2/2] rasdaemon: store PCIe dev name and TLP header for the aer event Shiju Jose
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190812101149.26036-2-shiju.jose@huawei.com \
--to=shiju.jose@huawei.com \
--cc=baicar@os.amperecomputing.com \
--cc=bp@alien8.de \
--cc=james.morse@arm.com \
--cc=jonathan.cameron@huawei.com \
--cc=lenb@kernel.org \
--cc=linux-acpi@vger.kernel.org \
--cc=linux-edac@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linuxarm@huawei.com \
--cc=rjw@rjwysocki.net \
--cc=tanxiaofei@huawei.com \
--cc=tony.luck@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).