linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] Add bad pmem bad blocks to bad range
@ 2019-08-14  8:24 Santosh Sivaraj
  2019-08-14  8:24 ` [PATCH 1/3] powerpc/mce: Add MCE notification chain Santosh Sivaraj
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Santosh Sivaraj @ 2019-08-14  8:24 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Aneesh Kumar K.V, Oliver O'Halloran, Chandan Rajendra,
	Reza Arbab, Mahesh Salgaonkar

This series, which should be based on top of the still un-merged
"powerpc: implement machine check safe memcpy" series, adds support
to add the bad blocks which generated an MCE to the NVDIMM bad blocks.
The next access of the same memory will be blocked by the NVDIMM layer
itself.

Santosh Sivaraj (3):
  powerpc/mce: Add MCE notification chain
  of_pmem: Add memory ranges which took a mce to bad range
  papr/scm: Add bad memory ranges to nvdimm bad ranges

 arch/powerpc/include/asm/mce.h            |   3 +
 arch/powerpc/kernel/mce.c                 |  15 +++
 arch/powerpc/platforms/pseries/papr_scm.c |  65 ++++++++++++
 drivers/nvdimm/of_pmem.c                  | 122 ++++++++++++++++++----
 4 files changed, 186 insertions(+), 19 deletions(-)

-- 
2.21.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/3] powerpc/mce: Add MCE notification chain
  2019-08-14  8:24 [PATCH 0/3] Add bad pmem bad blocks to bad range Santosh Sivaraj
@ 2019-08-14  8:24 ` Santosh Sivaraj
  2019-08-14  8:24 ` [PATCH 2/3] of_pmem: Add memory ranges which took a mce to bad range Santosh Sivaraj
  2019-08-14  8:24 ` [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges Santosh Sivaraj
  2 siblings, 0 replies; 7+ messages in thread
From: Santosh Sivaraj @ 2019-08-14  8:24 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Aneesh Kumar K.V, Oliver O'Halloran, Chandan Rajendra,
	Reza Arbab, Mahesh Salgaonkar

This is needed to report bad blocks for persistent memory.

Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
---
 arch/powerpc/include/asm/mce.h |  3 +++
 arch/powerpc/kernel/mce.c      | 15 +++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index e1931c8c2743..b1c6363f924c 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -212,6 +212,9 @@ extern void machine_check_queue_event(void);
 extern void machine_check_print_event_info(struct machine_check_event *evt,
 					   bool user_mode, bool in_guest);
 unsigned long addr_to_phys(struct pt_regs *regs, unsigned long addr);
+int mce_register_notifier(struct notifier_block *nb);
+int mce_unregister_notifier(struct notifier_block *nb);
+
 #ifdef CONFIG_PPC_BOOK3S_64
 void flush_and_reload_slb(void);
 #endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index ec4b3e1087be..a78210ca6cd9 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -47,6 +47,20 @@ static struct irq_work mce_ue_event_irq_work = {
 
 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
 
+static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
+
+int mce_register_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&mce_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(mce_register_notifier);
+
+int mce_unregister_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(mce_unregister_notifier);
+
 static void mce_set_error_info(struct machine_check_event *mce,
 			       struct mce_error_info *mce_err)
 {
@@ -263,6 +277,7 @@ static void machine_process_ue_event(struct work_struct *work)
 	while (__this_cpu_read(mce_ue_count) > 0) {
 		index = __this_cpu_read(mce_ue_count) - 1;
 		evt = this_cpu_ptr(&mce_ue_event_queue[index]);
+		blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
 #ifdef CONFIG_MEMORY_FAILURE
 		/*
 		 * This should probably queued elsewhere, but
-- 
2.21.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] of_pmem: Add memory ranges which took a mce to bad range
  2019-08-14  8:24 [PATCH 0/3] Add bad pmem bad blocks to bad range Santosh Sivaraj
  2019-08-14  8:24 ` [PATCH 1/3] powerpc/mce: Add MCE notification chain Santosh Sivaraj
@ 2019-08-14  8:24 ` Santosh Sivaraj
  2019-08-14  8:24 ` [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges Santosh Sivaraj
  2 siblings, 0 replies; 7+ messages in thread
From: Santosh Sivaraj @ 2019-08-14  8:24 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Aneesh Kumar K.V, Oliver O'Halloran, Chandan Rajendra,
	Reza Arbab, Mahesh Salgaonkar

Subscribe to the MCE notification and add the physical address which
generated a memory error to nvdimm bad range.

Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
---
 drivers/nvdimm/of_pmem.c | 122 +++++++++++++++++++++++++++++++++------
 1 file changed, 103 insertions(+), 19 deletions(-)

diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c
index a0c8dcfa0bf9..828dbfe44ca6 100644
--- a/drivers/nvdimm/of_pmem.c
+++ b/drivers/nvdimm/of_pmem.c
@@ -8,6 +8,9 @@
 #include <linux/module.h>
 #include <linux/ioport.h>
 #include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/nd.h>
+#include <asm/mce.h>
 
 static const struct attribute_group *region_attr_groups[] = {
 	&nd_region_attribute_group,
@@ -25,11 +28,77 @@ struct of_pmem_private {
 	struct nvdimm_bus *bus;
 };
 
+struct of_pmem_region {
+	struct of_pmem_private *priv;
+	struct nd_region_desc *region_desc;
+	struct nd_region *region;
+	struct list_head list;
+};
+
+LIST_HEAD(pmem_regions);
+DEFINE_MUTEX(pmem_region_lock);
+
+static int handle_mce_ue(struct notifier_block *nb, unsigned long val,
+			 void *data)
+{
+	struct machine_check_event *evt = data;
+	struct of_pmem_region *pmem_region;
+	u64 phys_addr;
+
+	if (evt->error_type != MCE_ERROR_TYPE_UE)
+		return NOTIFY_DONE;
+
+	if (list_empty(&pmem_regions))
+		return NOTIFY_DONE;
+
+	phys_addr = evt->u.ue_error.physical_address +
+		(evt->u.ue_error.effective_address & ~PAGE_MASK);
+
+	if (!evt->u.ue_error.physical_address_provided ||
+	    !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT)))
+		return NOTIFY_DONE;
+
+	mutex_lock(&pmem_region_lock);
+	list_for_each_entry(pmem_region, &pmem_regions, list) {
+		struct resource *res = pmem_region->region_desc->res;
+		u64 aligned_addr;
+
+		if (res->start > phys_addr)
+			continue;
+
+		if (res->end < phys_addr)
+			continue;
+
+		aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES);
+		pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n",
+			 aligned_addr, aligned_addr + L1_CACHE_BYTES);
+
+		if (nvdimm_bus_add_badrange(pmem_region->priv->bus,
+					     aligned_addr, L1_CACHE_BYTES))
+			pr_warn("Failed to add bad range (0x%llx -- 0x%llx)\n",
+				aligned_addr, aligned_addr + L1_CACHE_BYTES);
+
+		nvdimm_region_notify(pmem_region->region,
+				     NVDIMM_REVALIDATE_POISON);
+
+		break;
+	}
+	mutex_unlock(&pmem_region_lock);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block mce_ue_nb = {
+	.notifier_call = handle_mce_ue
+};
+
 static int of_pmem_region_probe(struct platform_device *pdev)
 {
 	struct of_pmem_private *priv;
 	struct device_node *np;
 	struct nvdimm_bus *bus;
+	struct of_pmem_region *pmem_region;
+	struct nd_region_desc *ndr_desc;
 	bool is_volatile;
 	int i;
 
@@ -58,34 +127,49 @@ static int of_pmem_region_probe(struct platform_device *pdev)
 			is_volatile ? "volatile" : "non-volatile",  np);
 
 	for (i = 0; i < pdev->num_resources; i++) {
-		struct nd_region_desc ndr_desc;
 		struct nd_region *region;
 
-		/*
-		 * NB: libnvdimm copies the data from ndr_desc into it's own
-		 * structures so passing a stack pointer is fine.
-		 */
-		memset(&ndr_desc, 0, sizeof(ndr_desc));
-		ndr_desc.attr_groups = region_attr_groups;
-		ndr_desc.numa_node = dev_to_node(&pdev->dev);
-		ndr_desc.target_node = ndr_desc.numa_node;
-		ndr_desc.res = &pdev->resource[i];
-		ndr_desc.of_node = np;
-		set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
+		ndr_desc = kzalloc(sizeof(struct nd_region_desc), GFP_KERNEL);
+		if (!ndr_desc) {
+			nvdimm_bus_unregister(priv->bus);
+			kfree(priv);
+			return -ENOMEM;
+		}
+
+		ndr_desc->attr_groups = region_attr_groups;
+		ndr_desc->numa_node = dev_to_node(&pdev->dev);
+		ndr_desc->target_node = ndr_desc->numa_node;
+		ndr_desc->res = &pdev->resource[i];
+		ndr_desc->of_node = np;
+		set_bit(ND_REGION_PAGEMAP, &ndr_desc->flags);
 
 		if (is_volatile)
-			region = nvdimm_volatile_region_create(bus, &ndr_desc);
+			region = nvdimm_volatile_region_create(bus, ndr_desc);
 		else
-			region = nvdimm_pmem_region_create(bus, &ndr_desc);
+			region = nvdimm_pmem_region_create(bus, ndr_desc);
 
 		if (!region)
-			dev_warn(&pdev->dev, "Unable to register region %pR from %pOF\n",
-					ndr_desc.res, np);
-		else
-			dev_dbg(&pdev->dev, "Registered region %pR from %pOF\n",
-					ndr_desc.res, np);
+			continue;
+
+		dev_dbg(&pdev->dev, "Registered region %pR from %pOF\n",
+			ndr_desc->res, np);
+
+		pmem_region = kzalloc(sizeof(struct of_pmem_region),
+				      GFP_KERNEL);
+		if (!pmem_region)
+			continue;
+
+		pmem_region->region_desc = ndr_desc;
+		pmem_region->region = region;
+		pmem_region->priv = priv;
+
+		/* Save regions registered for use by the notification code */
+		mutex_lock(&pmem_region_lock);
+		list_add_tail(&pmem_region->list, &pmem_regions);
+		mutex_unlock(&pmem_region_lock);
 	}
 
+	mce_register_notifier(&mce_ue_nb);
 	return 0;
 }
 
-- 
2.21.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges
  2019-08-14  8:24 [PATCH 0/3] Add bad pmem bad blocks to bad range Santosh Sivaraj
  2019-08-14  8:24 ` [PATCH 1/3] powerpc/mce: Add MCE notification chain Santosh Sivaraj
  2019-08-14  8:24 ` [PATCH 2/3] of_pmem: Add memory ranges which took a mce to bad range Santosh Sivaraj
@ 2019-08-14  8:24 ` Santosh Sivaraj
  2019-08-15 13:11   ` Oliver O'Halloran
  2 siblings, 1 reply; 7+ messages in thread
From: Santosh Sivaraj @ 2019-08-14  8:24 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Aneesh Kumar K.V, Oliver O'Halloran, Chandan Rajendra,
	Reza Arbab, Mahesh Salgaonkar

Subscribe to the MCE notification and add the physical address which
generated a memory error to nvdimm bad range.

Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
---
 arch/powerpc/platforms/pseries/papr_scm.c | 65 +++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index a5ac371a3f06..4d25c98a9835 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -12,6 +12,8 @@
 #include <linux/libnvdimm.h>
 #include <linux/platform_device.h>
 #include <linux/delay.h>
+#include <linux/nd.h>
+#include <asm/mce.h>
 
 #include <asm/plpar_wrappers.h>
 
@@ -39,8 +41,12 @@ struct papr_scm_priv {
 	struct resource res;
 	struct nd_region *region;
 	struct nd_interleave_set nd_set;
+	struct list_head list;
 };
 
+LIST_HEAD(papr_nd_regions);
+DEFINE_MUTEX(papr_ndr_lock);
+
 static int drc_pmem_bind(struct papr_scm_priv *p)
 {
 	unsigned long ret[PLPAR_HCALL_BUFSIZE];
@@ -364,6 +370,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
 		dev_info(dev, "Region registered with target node %d and online node %d",
 			 target_nid, online_nid);
 
+	mutex_lock(&papr_ndr_lock);
+	list_add_tail(&p->list, &papr_nd_regions);
+	mutex_unlock(&papr_ndr_lock);
+
 	return 0;
 
 err:	nvdimm_bus_unregister(p->bus);
@@ -371,6 +381,60 @@ err:	nvdimm_bus_unregister(p->bus);
 	return -ENXIO;
 }
 
+static int handle_mce_ue(struct notifier_block *nb, unsigned long val,
+			 void *data)
+{
+	struct machine_check_event *evt = data;
+	struct papr_scm_priv *p;
+	u64 phys_addr;
+
+	if (evt->error_type != MCE_ERROR_TYPE_UE)
+		return NOTIFY_DONE;
+
+	if (list_empty(&papr_nd_regions))
+		return NOTIFY_DONE;
+
+	phys_addr = evt->u.ue_error.physical_address +
+		(evt->u.ue_error.effective_address & ~PAGE_MASK);
+
+	if (!evt->u.ue_error.physical_address_provided ||
+	    !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT)))
+		return NOTIFY_DONE;
+
+	mutex_lock(&papr_ndr_lock);
+	list_for_each_entry(p, &papr_nd_regions, list) {
+		struct resource res = p->res;
+		u64 aligned_addr;
+
+		if (res.start > phys_addr)
+			continue;
+
+		if (res.end < phys_addr)
+			continue;
+
+		aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES);
+		pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n",
+			 aligned_addr, aligned_addr + L1_CACHE_BYTES);
+
+		if (nvdimm_bus_add_badrange(p->bus,
+					    aligned_addr, L1_CACHE_BYTES))
+			pr_warn("Failed to add bad range (0x%llx -- 0x%llx)\n",
+				aligned_addr, aligned_addr + L1_CACHE_BYTES);
+
+		nvdimm_region_notify(p->region,
+				     NVDIMM_REVALIDATE_POISON);
+
+		break;
+	}
+	mutex_unlock(&papr_ndr_lock);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block mce_ue_nb = {
+	.notifier_call = handle_mce_ue
+};
+
 static int papr_scm_probe(struct platform_device *pdev)
 {
 	struct device_node *dn = pdev->dev.of_node;
@@ -456,6 +520,7 @@ static int papr_scm_probe(struct platform_device *pdev)
 		goto err2;
 
 	platform_set_drvdata(pdev, p);
+	mce_register_notifier(&mce_ue_nb);
 
 	return 0;
 
-- 
2.21.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges
  2019-08-14  8:24 ` [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges Santosh Sivaraj
@ 2019-08-15 13:11   ` Oliver O'Halloran
  2019-08-15 14:00     ` Santosh Sivaraj
  0 siblings, 1 reply; 7+ messages in thread
From: Oliver O'Halloran @ 2019-08-15 13:11 UTC (permalink / raw)
  To: Santosh Sivaraj
  Cc: Reza Arbab, Aneesh Kumar K.V, Chandan Rajendra, linuxppc-dev,
	Mahesh J Salgaonkar

On Wed, Aug 14, 2019 at 6:25 PM Santosh Sivaraj <santosh@fossix.org> wrote:
>
> Subscribe to the MCE notification and add the physical address which
> generated a memory error to nvdimm bad range.
>
> Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
> ---
>  arch/powerpc/platforms/pseries/papr_scm.c | 65 +++++++++++++++++++++++
>  1 file changed, 65 insertions(+)
>
> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
> index a5ac371a3f06..4d25c98a9835 100644
> --- a/arch/powerpc/platforms/pseries/papr_scm.c
> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> @@ -12,6 +12,8 @@
>  #include <linux/libnvdimm.h>
>  #include <linux/platform_device.h>
>  #include <linux/delay.h>
> +#include <linux/nd.h>
> +#include <asm/mce.h>
>
>  #include <asm/plpar_wrappers.h>
>
> @@ -39,8 +41,12 @@ struct papr_scm_priv {
>         struct resource res;
>         struct nd_region *region;
>         struct nd_interleave_set nd_set;
> +       struct list_head list;

list is not a meaningful name. call it something more descriptive.

>  };
>
> +LIST_HEAD(papr_nd_regions);
> +DEFINE_MUTEX(papr_ndr_lock);

Should this be a mutex or a spinlock? I don't know what context the
mce notifier is called from, but if it's not sleepable then a mutex
will cause problems. Did you test this with lockdep enabled?

> +
>  static int drc_pmem_bind(struct papr_scm_priv *p)
>  {
>         unsigned long ret[PLPAR_HCALL_BUFSIZE];
> @@ -364,6 +370,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
>                 dev_info(dev, "Region registered with target node %d and online node %d",
>                          target_nid, online_nid);
>
> +       mutex_lock(&papr_ndr_lock);
> +       list_add_tail(&p->list, &papr_nd_regions);
> +       mutex_unlock(&papr_ndr_lock);
> +

Where's the matching remove when we unbind the driver?

>         return 0;
>
>  err:   nvdimm_bus_unregister(p->bus);
> @@ -371,6 +381,60 @@ err:       nvdimm_bus_unregister(p->bus);
>         return -ENXIO;
>  }
>
> +static int handle_mce_ue(struct notifier_block *nb, unsigned long val,
> +                        void *data)
> +{
> +       struct machine_check_event *evt = data;
> +       struct papr_scm_priv *p;
> +       u64 phys_addr;
> +
> +       if (evt->error_type != MCE_ERROR_TYPE_UE)
> +               return NOTIFY_DONE;
> +
> +       if (list_empty(&papr_nd_regions))
> +               return NOTIFY_DONE;
> +
> +       phys_addr = evt->u.ue_error.physical_address +
> +               (evt->u.ue_error.effective_address & ~PAGE_MASK);

Wait what? Why is physical_address page aligned, but effective_address
not? Not a problem with this patch, but still, what the hell?

> +       if (!evt->u.ue_error.physical_address_provided ||
> +           !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT)))
> +               return NOTIFY_DONE;
> +
> +       mutex_lock(&papr_ndr_lock);
> +       list_for_each_entry(p, &papr_nd_regions, list) {
> +               struct resource res = p->res;
> +               u64 aligned_addr;
> +

> +               if (res.start > phys_addr)
> +                       continue;
> +
> +               if (res.end < phys_addr)
> +                       continue;

surely there's a helper for this

> +
> +               aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES);
> +               pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n",
> +                        aligned_addr, aligned_addr + L1_CACHE_BYTES);
> +
> +               if (nvdimm_bus_add_badrange(p->bus,
> +                                           aligned_addr, L1_CACHE_BYTES))
> +                       pr_warn("Failed to add bad range (0x%llx -- 0x%llx)\n",
> +                               aligned_addr, aligned_addr + L1_CACHE_BYTES);
> +
> +               nvdimm_region_notify(p->region,
> +                                    NVDIMM_REVALIDATE_POISON);
> +
> +               break;

nit: you can avoid stacking indetation levels by breaking out of the
loop as soon as you've found the region you're looking for.

> +       }
> +       mutex_unlock(&papr_ndr_lock);
> +
> +       return NOTIFY_OK;
> +}
> +
> +static struct notifier_block mce_ue_nb = {
> +       .notifier_call = handle_mce_ue
> +};
> +
>  static int papr_scm_probe(struct platform_device *pdev)
>  {
>         struct device_node *dn = pdev->dev.of_node;
> @@ -456,6 +520,7 @@ static int papr_scm_probe(struct platform_device *pdev)
>                 goto err2;
>
>         platform_set_drvdata(pdev, p);
> +       mce_register_notifier(&mce_ue_nb);

Either get rid of the global region list and have a notifier block in
each device's driver private data, or keep the global list and
register the notifier in module_init(). Re-registering the notifier
each time a seperate device is probed seems very sketchy.

>         return 0;
>
> --
> 2.21.0
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges
  2019-08-15 13:11   ` Oliver O'Halloran
@ 2019-08-15 14:00     ` Santosh Sivaraj
  0 siblings, 0 replies; 7+ messages in thread
From: Santosh Sivaraj @ 2019-08-15 14:00 UTC (permalink / raw)
  To: Oliver O'Halloran
  Cc: Reza Arbab, Aneesh Kumar K.V, Chandan Rajendra, linuxppc-dev,
	Mahesh J Salgaonkar

"Oliver O'Halloran" <oohall@gmail.com> writes:

> On Wed, Aug 14, 2019 at 6:25 PM Santosh Sivaraj <santosh@fossix.org> wrote:
>>
>> Subscribe to the MCE notification and add the physical address which
>> generated a memory error to nvdimm bad range.
>>
>> Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
>> ---
>>  arch/powerpc/platforms/pseries/papr_scm.c | 65 +++++++++++++++++++++++
>>  1 file changed, 65 insertions(+)
>>
>> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
>> index a5ac371a3f06..4d25c98a9835 100644
>> --- a/arch/powerpc/platforms/pseries/papr_scm.c
>> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
>> @@ -12,6 +12,8 @@
>>  #include <linux/libnvdimm.h>
>>  #include <linux/platform_device.h>
>>  #include <linux/delay.h>
>> +#include <linux/nd.h>
>> +#include <asm/mce.h>
>>
>>  #include <asm/plpar_wrappers.h>
>>
>> @@ -39,8 +41,12 @@ struct papr_scm_priv {
>>         struct resource res;
>>         struct nd_region *region;
>>         struct nd_interleave_set nd_set;
>> +       struct list_head list;
>
> list is not a meaningful name. call it something more descriptive.
>
>>  };
>>
>> +LIST_HEAD(papr_nd_regions);
>> +DEFINE_MUTEX(papr_ndr_lock);
>
> Should this be a mutex or a spinlock? I don't know what context the
> mce notifier is called from, but if it's not sleepable then a mutex
> will cause problems. Did you test this with lockdep enabled?

This would be a mutex, we are called from a blocking notifier.

>
>> +
>>  static int drc_pmem_bind(struct papr_scm_priv *p)
>>  {
>>         unsigned long ret[PLPAR_HCALL_BUFSIZE];
>> @@ -364,6 +370,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
>>                 dev_info(dev, "Region registered with target node %d and online node %d",
>>                          target_nid, online_nid);
>>
>> +       mutex_lock(&papr_ndr_lock);
>> +       list_add_tail(&p->list, &papr_nd_regions);
>> +       mutex_unlock(&papr_ndr_lock);
>> +
>
> Where's the matching remove when we unbind the driver?

Missed it completely. Will fix it.

>
>>         return 0;
>>pp
>>  err:   nvdimm_bus_unregister(p->bus);
>> @@ -371,6 +381,60 @@ err:       nvdimm_bus_unregister(p->bus);
>>         return -ENXIO;
>>  }
>>
>> +static int handle_mce_ue(struct notifier_block *nb, unsigned long val,
>> +                        void *data)
>> +{
>> +       struct machine_check_event *evt = data;
>> +       struct papr_scm_priv *p;
>> +       u64 phys_addr;
>> +
>> +       if (evt->error_type != MCE_ERROR_TYPE_UE)
>> +               return NOTIFY_DONE;
>> +
>> +       if (list_empty(&papr_nd_regions))
>> +               return NOTIFY_DONE;
>> +
>> +       phys_addr = evt->u.ue_error.physical_address +
>> +               (evt->u.ue_error.effective_address & ~PAGE_MASK);
>
> Wait what? Why is physical_address page aligned, but effective_address
> not? Not a problem with this patch, but still, what the hell?

Not sure why, but its the way now. I can see if I can update it if it makes
sense in a later patch.

>
>> +       if (!evt->u.ue_error.physical_address_provided ||
>> +           !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT)))
>> +               return NOTIFY_DONE;
>> +
>> +       mutex_lock(&papr_ndr_lock);
>> +       list_for_each_entry(p, &papr_nd_regions, list) {
>> +               struct resource res = p->res;
>> +               u64 aligned_addr;
>> +
>
>> +               if (res.start > phys_addr)
>> +                       continue;
>> +
>> +               if (res.end < phys_addr)
>> +                       continue;
>
> surely there's a helper for this
>
>> +
>> +               aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES);
>> +               pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n",
>> +                        aligned_addr, aligned_addr + L1_CACHE_BYTES);
>> +
>> +               if (nvdimm_bus_add_badrange(p->bus,
>> +                                           aligned_addr, L1_CACHE_BYTES))
>> +                       pr_warn("Failed to add bad range (0x%llx -- 0x%llx)\n",
>> +                               aligned_addr, aligned_addr + L1_CACHE_BYTES);
>> +
>> +               nvdimm_region_notify(p->region,
>> +                                    NVDIMM_REVALIDATE_POISON);
>> +
>> +               break;
>
> nit: you can avoid stacking indetation levels by breaking out of the
> loop as soon as you've found the region you're looking for.

True.

>
>> +       }
>> +       mutex_unlock(&papr_ndr_lock);
>> +
>> +       return NOTIFY_OK;
>> +}
>> +
>> +static struct notifier_block mce_ue_nb = {
>> +       .notifier_call = handle_mce_ue
>> +};
>> +
>>  static int papr_scm_probe(struct platform_device *pdev)
>>  {
>>         struct device_node *dn = pdev->dev.of_node;
>> @@ -456,6 +520,7 @@ static int papr_scm_probe(struct platform_device *pdev)
>>                 goto err2;
>>
>>         platform_set_drvdata(pdev, p);
>> +       mce_register_notifier(&mce_ue_nb);
>
> Either get rid of the global region list and have a notifier block in
> each device's driver private data, or keep the global list and
> register the notifier in module_init(). Re-registering the notifier
> each time a seperate device is probed seems very sketchy.

Registering the notifier in the init is simpler. I will change it.

>
>>         return 0;
>>
>> --
>> 2.21.0
p>>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges
  2019-08-20  2:30 [PATCH 0/3] Add bad pmem bad blocks to bad range Santosh Sivaraj
@ 2019-08-20  2:30 ` Santosh Sivaraj
  0 siblings, 0 replies; 7+ messages in thread
From: Santosh Sivaraj @ 2019-08-20  2:30 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Chandan Rajendra, Oliver O'Halloran, Mahesh Salgaonkar

Subscribe to the MCE notification and add the physical address which
generated a memory error to nvdimm bad range.

Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
---
 arch/powerpc/platforms/pseries/papr_scm.c | 86 ++++++++++++++++++++++-
 1 file changed, 85 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index a5ac371a3f06..e38f7febc5d9 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -12,6 +12,8 @@
 #include <linux/libnvdimm.h>
 #include <linux/platform_device.h>
 #include <linux/delay.h>
+#include <linux/nd.h>
+#include <asm/mce.h>
 
 #include <asm/plpar_wrappers.h>
 
@@ -39,8 +41,12 @@ struct papr_scm_priv {
 	struct resource res;
 	struct nd_region *region;
 	struct nd_interleave_set nd_set;
+	struct list_head region_list;
 };
 
+LIST_HEAD(papr_nd_regions);
+DEFINE_MUTEX(papr_ndr_lock);
+
 static int drc_pmem_bind(struct papr_scm_priv *p)
 {
 	unsigned long ret[PLPAR_HCALL_BUFSIZE];
@@ -364,6 +370,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
 		dev_info(dev, "Region registered with target node %d and online node %d",
 			 target_nid, online_nid);
 
+	mutex_lock(&papr_ndr_lock);
+	list_add_tail(&p->region_list, &papr_nd_regions);
+	mutex_unlock(&papr_ndr_lock);
+
 	return 0;
 
 err:	nvdimm_bus_unregister(p->bus);
@@ -371,6 +381,57 @@ err:	nvdimm_bus_unregister(p->bus);
 	return -ENXIO;
 }
 
+static int handle_mce_ue(struct notifier_block *nb, unsigned long val,
+			 void *data)
+{
+	struct machine_check_event *evt = data;
+	struct papr_scm_priv *p;
+	u64 phys_addr, aligned_addr;
+	bool found = false;
+
+	if (evt->error_type != MCE_ERROR_TYPE_UE)
+		return NOTIFY_DONE;
+
+	if (list_empty(&papr_nd_regions))
+		return NOTIFY_DONE;
+
+	phys_addr = evt->u.ue_error.physical_address +
+		(evt->u.ue_error.effective_address & ~PAGE_MASK);
+
+	if (!evt->u.ue_error.physical_address_provided ||
+	    !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT)))
+		return NOTIFY_DONE;
+
+	mutex_lock(&papr_ndr_lock);
+	list_for_each_entry(p, &papr_nd_regions, region_list) {
+		struct resource res = p->res;
+
+		if (phys_addr >= res.start && phys_addr <= res.end) {
+			found = true;
+			break;
+		}
+	}
+	mutex_unlock(&papr_ndr_lock);
+
+	if (!found)
+		return NOTIFY_DONE;
+
+	aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES);
+	if (nvdimm_bus_add_badrange(p->bus, aligned_addr, L1_CACHE_BYTES))
+		return NOTIFY_DONE;
+
+	pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n",
+		 aligned_addr, aligned_addr + L1_CACHE_BYTES);
+
+	nvdimm_region_notify(p->region, NVDIMM_REVALIDATE_POISON);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block mce_ue_nb = {
+	.notifier_call = handle_mce_ue
+};
+
 static int papr_scm_probe(struct platform_device *pdev)
 {
 	struct device_node *dn = pdev->dev.of_node;
@@ -456,6 +517,7 @@ static int papr_scm_probe(struct platform_device *pdev)
 		goto err2;
 
 	platform_set_drvdata(pdev, p);
+	mce_register_notifier(&mce_ue_nb);
 
 	return 0;
 
@@ -468,6 +530,10 @@ static int papr_scm_remove(struct platform_device *pdev)
 {
 	struct papr_scm_priv *p = platform_get_drvdata(pdev);
 
+	mutex_lock(&papr_ndr_lock);
+	list_del(&(p->region_list));
+	mutex_unlock(&papr_ndr_lock);
+
 	nvdimm_bus_unregister(p->bus);
 	drc_pmem_unbind(p);
 	kfree(p);
@@ -490,7 +556,25 @@ static struct platform_driver papr_scm_driver = {
 	},
 };
 
-module_platform_driver(papr_scm_driver);
+static int __init papr_scm_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&papr_scm_driver);
+	if (!ret)
+		mce_register_notifier(&mce_ue_nb);
+
+	return ret;
+}
+module_init(papr_scm_init);
+
+static void __exit papr_scm_exit(void)
+{
+	mce_unregister_notifier(&mce_ue_nb);
+	platform_driver_unregister(&papr_scm_driver);
+}
+module_exit(papr_scm_exit);
+
 MODULE_DEVICE_TABLE(of, papr_scm_match);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("IBM Corporation");
-- 
2.21.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2019-08-20  2:53 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-14  8:24 [PATCH 0/3] Add bad pmem bad blocks to bad range Santosh Sivaraj
2019-08-14  8:24 ` [PATCH 1/3] powerpc/mce: Add MCE notification chain Santosh Sivaraj
2019-08-14  8:24 ` [PATCH 2/3] of_pmem: Add memory ranges which took a mce to bad range Santosh Sivaraj
2019-08-14  8:24 ` [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges Santosh Sivaraj
2019-08-15 13:11   ` Oliver O'Halloran
2019-08-15 14:00     ` Santosh Sivaraj
2019-08-20  2:30 [PATCH 0/3] Add bad pmem bad blocks to bad range Santosh Sivaraj
2019-08-20  2:30 ` [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges Santosh Sivaraj

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).