All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RESEND v8 1/2] ACPI / APEI: Add support to notify the vendor specific HW errors
@ 2020-05-29 20:04 Shiju Jose
  2020-06-01  0:59   ` kbuild test robot
  0 siblings, 1 reply; 3+ messages in thread
From: Shiju Jose @ 2020-05-29 20:04 UTC (permalink / raw)
  To: linux-acpi, linux-pci, linux-kernel, rjw, bp, james.morse,
	helgaas, lenb, tony.luck, dan.carpenter, gregkh, zhangliguang,
	tglx
  Cc: Shiju Jose, linuxarm, yangyicong

Add support to report the vendor specific non-fatal HW errors
to the drivers for the error recovery.

Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
 drivers/acpi/apei/ghes.c | 130 ++++++++++++++++++++++++++++++++++++++-
 include/acpi/ghes.h      |  28 +++++++++
 2 files changed, 157 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 24c9642e8fc7..2d10709b2eb5 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -33,6 +33,7 @@
 #include <linux/irq_work.h>
 #include <linux/llist.h>
 #include <linux/genalloc.h>
+#include <linux/kfifo.h>
 #include <linux/pci.h>
 #include <linux/pfn.h>
 #include <linux/aer.h>
@@ -63,6 +64,11 @@
 #define GHES_ESTATUS_CACHES_SIZE	4
 
 #define GHES_ESTATUS_IN_CACHE_MAX_NSEC	10000000000ULL
+
+#define GHES_EVENT_RING_SIZE	256
+#define GHES_GDATA_POOL_MIN_ALLOC_ORDER	3
+#define GHES_GDATA_POOL_MIN_SIZE	65536
+
 /* Prevent too many caches are allocated because of RCU */
 #define GHES_ESTATUS_CACHE_ALLOCED_MAX	(GHES_ESTATUS_CACHES_SIZE * 3 / 2)
 
@@ -122,6 +128,19 @@ static DEFINE_MUTEX(ghes_list_mutex);
  */
 static DEFINE_SPINLOCK(ghes_notify_lock_irq);
 
+struct ghes_event_entry {
+	struct acpi_hest_generic_data *gdata;
+	int error_severity;
+};
+
+static DEFINE_KFIFO(ghes_event_ring, struct ghes_event_entry,
+		    GHES_EVENT_RING_SIZE);
+
+static DEFINE_SPINLOCK(ghes_event_ring_lock);
+
+static struct gen_pool *ghes_gdata_pool;
+static unsigned long ghes_gdata_pool_size_request;
+
 static struct gen_pool *ghes_estatus_pool;
 static unsigned long ghes_estatus_pool_size_request;
 
@@ -188,6 +207,40 @@ int ghes_estatus_pool_init(int num_ghes)
 	return -ENOMEM;
 }
 
+int ghes_gdata_pool_init(void)
+{
+	unsigned long addr, len;
+	int rc;
+
+	ghes_gdata_pool = gen_pool_create(GHES_GDATA_POOL_MIN_ALLOC_ORDER, -1);
+	if (!ghes_gdata_pool)
+		return -ENOMEM;
+
+	if (ghes_gdata_pool_size_request < GHES_GDATA_POOL_MIN_SIZE)
+		ghes_gdata_pool_size_request = GHES_GDATA_POOL_MIN_SIZE;
+
+	len = ghes_gdata_pool_size_request;
+	addr = (unsigned long)vmalloc(PAGE_ALIGN(len));
+	if (!addr)
+		goto err_pool_alloc;
+
+	vmalloc_sync_mappings();
+
+	rc = gen_pool_add(ghes_gdata_pool, addr, PAGE_ALIGN(len), -1);
+	if (rc)
+		goto err_pool_add;
+
+	return 0;
+
+err_pool_add:
+	vfree((void *)addr);
+
+err_pool_alloc:
+	gen_pool_destroy(ghes_gdata_pool);
+
+	return -ENOMEM;
+}
+
 static int map_gen_v2(struct ghes *ghes)
 {
 	return apei_map_generic_address(&ghes->generic_v2->read_ack_register);
@@ -247,6 +300,10 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic)
 		goto err_unmap_status_addr;
 	}
 
+	ghes_gdata_pool_size_request += generic->records_to_preallocate *
+					generic->max_sections_per_record *
+					generic->max_raw_data_length;
+
 	return ghes;
 
 err_unmap_status_addr:
@@ -490,6 +547,68 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
 #endif
 }
 
+static BLOCKING_NOTIFIER_HEAD(ghes_event_notify_list);
+
+/**
+ * ghes_register_event_notifier - register an event notifier
+ * for the non-fatal HW errors.
+ * @nb: pointer to the notifier_block structure of the event handler.
+ *
+ * return 0 : SUCCESS, non-zero : FAIL
+ */
+int ghes_register_event_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&ghes_event_notify_list, nb);
+}
+EXPORT_SYMBOL_GPL(ghes_register_event_notifier);
+
+/**
+ * ghes_unregister_event_notifier - unregister the previously
+ * registered event notifier.
+ * @nb: pointer to the notifier_block structure of the event handler.
+ */
+void ghes_unregister_event_notifier(struct notifier_block *nb)
+{
+	blocking_notifier_chain_unregister(&ghes_event_notify_list, nb);
+}
+EXPORT_SYMBOL_GPL(ghes_unregister_event_notifier);
+
+static void ghes_event_work_func(struct work_struct *work)
+{
+	struct ghes_event_entry entry;
+	u32 len;
+
+	while (kfifo_get(&ghes_event_ring, &entry)) {
+		blocking_notifier_call_chain(&ghes_event_notify_list,
+					     entry.error_severity,
+					     entry.gdata);
+		len = acpi_hest_get_record_size(entry.gdata);
+		gen_pool_free(ghes_gdata_pool, (unsigned long)entry.gdata, len);
+	}
+}
+
+static DECLARE_WORK(ghes_event_work, ghes_event_work_func);
+
+static void ghes_handle_non_standard_event(struct acpi_hest_generic_data *gdata,
+					   int sev)
+{
+	u32 len;
+	struct ghes_event_entry event_entry;
+
+	len = acpi_hest_get_record_size(gdata);
+	event_entry.gdata = (void *)gen_pool_alloc(ghes_gdata_pool, len);
+	if (event_entry.gdata) {
+		memcpy(event_entry.gdata, gdata, len);
+		event_entry.error_severity = sev;
+
+		if (kfifo_in_spinlocked(&ghes_event_ring, &event_entry, 1,
+					&ghes_event_ring_lock))
+			schedule_work(&ghes_event_work);
+		else
+			pr_warn(GHES_PFX "ghes event queue full\n");
+	}
+}
+
 static void ghes_do_proc(struct ghes *ghes,
 			 const struct acpi_hest_generic_status *estatus)
 {
@@ -527,6 +646,7 @@ static void ghes_do_proc(struct ghes *ghes,
 		} else {
 			void *err = acpi_hest_get_payload(gdata);
 
+			ghes_handle_non_standard_event(gdata, sev);
 			log_non_standard_event(sec_type, fru_id, fru_text,
 					       sec_sev, err,
 					       gdata->error_data_length);
@@ -1334,7 +1454,7 @@ static int __init ghes_init(void)
 
 	rc = platform_driver_register(&ghes_platform_driver);
 	if (rc)
-		goto err;
+		goto exit;
 
 	rc = apei_osc_setup();
 	if (rc == 0 && osc_sb_apei_support_acked)
@@ -1346,8 +1466,16 @@ static int __init ghes_init(void)
 	else
 		pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
 
+	rc = ghes_gdata_pool_init();
+	if (rc) {
+		pr_warn(GHES_PFX "ghes_gdata_pool_init failed\n");
+		goto err;
+	}
+
 	return 0;
 err:
+	platform_driver_unregister(&ghes_platform_driver);
+exit:
 	return rc;
 }
 device_initcall(ghes_init);
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index e3f1cddb4ac8..a3dd82069069 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -50,6 +50,34 @@ enum {
 	GHES_SEV_PANIC = 0x3,
 };
 
+
+#ifdef CONFIG_ACPI_APEI_GHES
+/**
+ * ghes_register_event_notifier - register an event notifier
+ * for the non-fatal HW errors.
+ * @nb: pointer to the notifier_block structure of the event notifier.
+ *
+ * Return : 0 - SUCCESS, non-zero - FAIL.
+ */
+int ghes_register_event_notifier(struct notifier_block *nb);
+
+/**
+ * ghes_unregister_event_notifier - unregister the previously
+ * registered event notifier.
+ * @nb: pointer to the notifier_block structure of the event notifier.
+ */
+void ghes_unregister_event_notifier(struct notifier_block *nb);
+#else
+static inline int ghes_register_event_notifier(struct notifier_block *nb)
+{
+	return -ENODEV;
+}
+
+static inline void ghes_unregister_event_notifier(struct notifier_block *nb)
+{
+}
+#endif
+
 int ghes_estatus_pool_init(int num_ghes);
 
 /* From drivers/edac/ghes_edac.c */
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH RESEND v8 1/2] ACPI / APEI: Add support to notify the vendor specific HW errors
  2020-05-29 20:04 [PATCH RESEND v8 1/2] ACPI / APEI: Add support to notify the vendor specific HW errors Shiju Jose
@ 2020-06-01  0:59   ` kbuild test robot
  0 siblings, 0 replies; 3+ messages in thread
From: kbuild test robot @ 2020-06-01  0:59 UTC (permalink / raw)
  To: Shiju Jose, linux-acpi, linux-pci, linux-kernel, rjw, bp,
	james.morse, helgaas, lenb, tony.luck, dan.carpenter
  Cc: kbuild-all, clang-built-linux

[-- Attachment #1: Type: text/plain, Size: 2858 bytes --]

Hi Shiju,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on pci/next]
[also build test WARNING on linus/master v5.7-rc7]
[cannot apply to pm/linux-next next-20200529]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Shiju-Jose/ACPI-APEI-Add-support-to-notify-the-vendor-specific-HW-errors/20200601-003936
base:   https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git next
config: x86_64-allyesconfig (attached as .config)
compiler: clang version 11.0.0 (https://github.com/llvm/llvm-project 2388a096e7865c043e83ece4e26654bd3d1a20d5)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install x86_64 cross compiling tool for clang build
        # apt-get install binutils-x86-64-linux-gnu
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot <lkp@intel.com>

All warnings (new ones prefixed by >>, old ones prefixed by <<):

>> drivers/acpi/apei/ghes.c:210:5: warning: no previous prototype for function 'ghes_gdata_pool_init' [-Wmissing-prototypes]
int ghes_gdata_pool_init(void)
^
drivers/acpi/apei/ghes.c:210:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
int ghes_gdata_pool_init(void)
^
static
1 warning generated.

vim +/ghes_gdata_pool_init +210 drivers/acpi/apei/ghes.c

   209	
 > 210	int ghes_gdata_pool_init(void)
   211	{
   212		unsigned long addr, len;
   213		int rc;
   214	
   215		ghes_gdata_pool = gen_pool_create(GHES_GDATA_POOL_MIN_ALLOC_ORDER, -1);
   216		if (!ghes_gdata_pool)
   217			return -ENOMEM;
   218	
   219		if (ghes_gdata_pool_size_request < GHES_GDATA_POOL_MIN_SIZE)
   220			ghes_gdata_pool_size_request = GHES_GDATA_POOL_MIN_SIZE;
   221	
   222		len = ghes_gdata_pool_size_request;
   223		addr = (unsigned long)vmalloc(PAGE_ALIGN(len));
   224		if (!addr)
   225			goto err_pool_alloc;
   226	
   227		vmalloc_sync_mappings();
   228	
   229		rc = gen_pool_add(ghes_gdata_pool, addr, PAGE_ALIGN(len), -1);
   230		if (rc)
   231			goto err_pool_add;
   232	
   233		return 0;
   234	
   235	err_pool_add:
   236		vfree((void *)addr);
   237	
   238	err_pool_alloc:
   239		gen_pool_destroy(ghes_gdata_pool);
   240	
   241		return -ENOMEM;
   242	}
   243	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 73483 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH RESEND v8 1/2] ACPI / APEI: Add support to notify the vendor specific HW errors
@ 2020-06-01  0:59   ` kbuild test robot
  0 siblings, 0 replies; 3+ messages in thread
From: kbuild test robot @ 2020-06-01  0:59 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 2936 bytes --]

Hi Shiju,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on pci/next]
[also build test WARNING on linus/master v5.7-rc7]
[cannot apply to pm/linux-next next-20200529]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Shiju-Jose/ACPI-APEI-Add-support-to-notify-the-vendor-specific-HW-errors/20200601-003936
base:   https://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git next
config: x86_64-allyesconfig (attached as .config)
compiler: clang version 11.0.0 (https://github.com/llvm/llvm-project 2388a096e7865c043e83ece4e26654bd3d1a20d5)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install x86_64 cross compiling tool for clang build
        # apt-get install binutils-x86-64-linux-gnu
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot <lkp@intel.com>

All warnings (new ones prefixed by >>, old ones prefixed by <<):

>> drivers/acpi/apei/ghes.c:210:5: warning: no previous prototype for function 'ghes_gdata_pool_init' [-Wmissing-prototypes]
int ghes_gdata_pool_init(void)
^
drivers/acpi/apei/ghes.c:210:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
int ghes_gdata_pool_init(void)
^
static
1 warning generated.

vim +/ghes_gdata_pool_init +210 drivers/acpi/apei/ghes.c

   209	
 > 210	int ghes_gdata_pool_init(void)
   211	{
   212		unsigned long addr, len;
   213		int rc;
   214	
   215		ghes_gdata_pool = gen_pool_create(GHES_GDATA_POOL_MIN_ALLOC_ORDER, -1);
   216		if (!ghes_gdata_pool)
   217			return -ENOMEM;
   218	
   219		if (ghes_gdata_pool_size_request < GHES_GDATA_POOL_MIN_SIZE)
   220			ghes_gdata_pool_size_request = GHES_GDATA_POOL_MIN_SIZE;
   221	
   222		len = ghes_gdata_pool_size_request;
   223		addr = (unsigned long)vmalloc(PAGE_ALIGN(len));
   224		if (!addr)
   225			goto err_pool_alloc;
   226	
   227		vmalloc_sync_mappings();
   228	
   229		rc = gen_pool_add(ghes_gdata_pool, addr, PAGE_ALIGN(len), -1);
   230		if (rc)
   231			goto err_pool_add;
   232	
   233		return 0;
   234	
   235	err_pool_add:
   236		vfree((void *)addr);
   237	
   238	err_pool_alloc:
   239		gen_pool_destroy(ghes_gdata_pool);
   240	
   241		return -ENOMEM;
   242	}
   243	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 73483 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-06-01  1:08 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-29 20:04 [PATCH RESEND v8 1/2] ACPI / APEI: Add support to notify the vendor specific HW errors Shiju Jose
2020-06-01  0:59 ` kbuild test robot
2020-06-01  0:59   ` kbuild test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.