* [PATCH 1/3] powerpc/mce: Add MCE notification chain
2019-08-14 8:24 [PATCH 0/3] Add bad pmem bad blocks to bad range Santosh Sivaraj
@ 2019-08-14 8:24 ` Santosh Sivaraj
2019-08-14 8:24 ` [PATCH 2/3] of_pmem: Add memory ranges which took a mce to bad range Santosh Sivaraj
2019-08-14 8:24 ` [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges Santosh Sivaraj
2 siblings, 0 replies; 7+ messages in thread
From: Santosh Sivaraj @ 2019-08-14 8:24 UTC (permalink / raw)
To: linuxppc-dev
Cc: Aneesh Kumar K.V, Oliver O'Halloran, Chandan Rajendra,
Reza Arbab, Mahesh Salgaonkar
This is needed to report bad blocks for persistent memory.
Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
---
arch/powerpc/include/asm/mce.h | 3 +++
arch/powerpc/kernel/mce.c | 15 +++++++++++++++
2 files changed, 18 insertions(+)
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index e1931c8c2743..b1c6363f924c 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -212,6 +212,9 @@ extern void machine_check_queue_event(void);
extern void machine_check_print_event_info(struct machine_check_event *evt,
bool user_mode, bool in_guest);
unsigned long addr_to_phys(struct pt_regs *regs, unsigned long addr);
+int mce_register_notifier(struct notifier_block *nb);
+int mce_unregister_notifier(struct notifier_block *nb);
+
#ifdef CONFIG_PPC_BOOK3S_64
void flush_and_reload_slb(void);
#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index ec4b3e1087be..a78210ca6cd9 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -47,6 +47,20 @@ static struct irq_work mce_ue_event_irq_work = {
DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
+static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
+
+int mce_register_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&mce_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(mce_register_notifier);
+
+int mce_unregister_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(mce_unregister_notifier);
+
static void mce_set_error_info(struct machine_check_event *mce,
struct mce_error_info *mce_err)
{
@@ -263,6 +277,7 @@ static void machine_process_ue_event(struct work_struct *work)
while (__this_cpu_read(mce_ue_count) > 0) {
index = __this_cpu_read(mce_ue_count) - 1;
evt = this_cpu_ptr(&mce_ue_event_queue[index]);
+ blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
#ifdef CONFIG_MEMORY_FAILURE
/*
* This should probably queued elsewhere, but
--
2.21.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/3] of_pmem: Add memory ranges which took a mce to bad range
2019-08-14 8:24 [PATCH 0/3] Add bad pmem bad blocks to bad range Santosh Sivaraj
2019-08-14 8:24 ` [PATCH 1/3] powerpc/mce: Add MCE notification chain Santosh Sivaraj
@ 2019-08-14 8:24 ` Santosh Sivaraj
2019-08-14 8:24 ` [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges Santosh Sivaraj
2 siblings, 0 replies; 7+ messages in thread
From: Santosh Sivaraj @ 2019-08-14 8:24 UTC (permalink / raw)
To: linuxppc-dev
Cc: Aneesh Kumar K.V, Oliver O'Halloran, Chandan Rajendra,
Reza Arbab, Mahesh Salgaonkar
Subscribe to the MCE notification and add the physical address which
generated a memory error to nvdimm bad range.
Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
---
drivers/nvdimm/of_pmem.c | 122 +++++++++++++++++++++++++++++++++------
1 file changed, 103 insertions(+), 19 deletions(-)
diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c
index a0c8dcfa0bf9..828dbfe44ca6 100644
--- a/drivers/nvdimm/of_pmem.c
+++ b/drivers/nvdimm/of_pmem.c
@@ -8,6 +8,9 @@
#include <linux/module.h>
#include <linux/ioport.h>
#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/nd.h>
+#include <asm/mce.h>
static const struct attribute_group *region_attr_groups[] = {
&nd_region_attribute_group,
@@ -25,11 +28,77 @@ struct of_pmem_private {
struct nvdimm_bus *bus;
};
+struct of_pmem_region {
+ struct of_pmem_private *priv;
+ struct nd_region_desc *region_desc;
+ struct nd_region *region;
+ struct list_head list;
+};
+
+LIST_HEAD(pmem_regions);
+DEFINE_MUTEX(pmem_region_lock);
+
+static int handle_mce_ue(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct machine_check_event *evt = data;
+ struct of_pmem_region *pmem_region;
+ u64 phys_addr;
+
+ if (evt->error_type != MCE_ERROR_TYPE_UE)
+ return NOTIFY_DONE;
+
+ if (list_empty(&pmem_regions))
+ return NOTIFY_DONE;
+
+ phys_addr = evt->u.ue_error.physical_address +
+ (evt->u.ue_error.effective_address & ~PAGE_MASK);
+
+ if (!evt->u.ue_error.physical_address_provided ||
+ !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT)))
+ return NOTIFY_DONE;
+
+ mutex_lock(&pmem_region_lock);
+ list_for_each_entry(pmem_region, &pmem_regions, list) {
+ struct resource *res = pmem_region->region_desc->res;
+ u64 aligned_addr;
+
+ if (res->start > phys_addr)
+ continue;
+
+ if (res->end < phys_addr)
+ continue;
+
+ aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES);
+ pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n",
+ aligned_addr, aligned_addr + L1_CACHE_BYTES);
+
+ if (nvdimm_bus_add_badrange(pmem_region->priv->bus,
+ aligned_addr, L1_CACHE_BYTES))
+ pr_warn("Failed to add bad range (0x%llx -- 0x%llx)\n",
+ aligned_addr, aligned_addr + L1_CACHE_BYTES);
+
+ nvdimm_region_notify(pmem_region->region,
+ NVDIMM_REVALIDATE_POISON);
+
+ break;
+ }
+ mutex_unlock(&pmem_region_lock);
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block mce_ue_nb = {
+ .notifier_call = handle_mce_ue
+};
+
static int of_pmem_region_probe(struct platform_device *pdev)
{
struct of_pmem_private *priv;
struct device_node *np;
struct nvdimm_bus *bus;
+ struct of_pmem_region *pmem_region;
+ struct nd_region_desc *ndr_desc;
bool is_volatile;
int i;
@@ -58,34 +127,49 @@ static int of_pmem_region_probe(struct platform_device *pdev)
is_volatile ? "volatile" : "non-volatile", np);
for (i = 0; i < pdev->num_resources; i++) {
- struct nd_region_desc ndr_desc;
struct nd_region *region;
- /*
- * NB: libnvdimm copies the data from ndr_desc into it's own
- * structures so passing a stack pointer is fine.
- */
- memset(&ndr_desc, 0, sizeof(ndr_desc));
- ndr_desc.attr_groups = region_attr_groups;
- ndr_desc.numa_node = dev_to_node(&pdev->dev);
- ndr_desc.target_node = ndr_desc.numa_node;
- ndr_desc.res = &pdev->resource[i];
- ndr_desc.of_node = np;
- set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
+ ndr_desc = kzalloc(sizeof(struct nd_region_desc), GFP_KERNEL);
+ if (!ndr_desc) {
+ nvdimm_bus_unregister(priv->bus);
+ kfree(priv);
+ return -ENOMEM;
+ }
+
+ ndr_desc->attr_groups = region_attr_groups;
+ ndr_desc->numa_node = dev_to_node(&pdev->dev);
+ ndr_desc->target_node = ndr_desc->numa_node;
+ ndr_desc->res = &pdev->resource[i];
+ ndr_desc->of_node = np;
+ set_bit(ND_REGION_PAGEMAP, &ndr_desc->flags);
if (is_volatile)
- region = nvdimm_volatile_region_create(bus, &ndr_desc);
+ region = nvdimm_volatile_region_create(bus, ndr_desc);
else
- region = nvdimm_pmem_region_create(bus, &ndr_desc);
+ region = nvdimm_pmem_region_create(bus, ndr_desc);
if (!region)
- dev_warn(&pdev->dev, "Unable to register region %pR from %pOF\n",
- ndr_desc.res, np);
- else
- dev_dbg(&pdev->dev, "Registered region %pR from %pOF\n",
- ndr_desc.res, np);
+ continue;
+
+ dev_dbg(&pdev->dev, "Registered region %pR from %pOF\n",
+ ndr_desc->res, np);
+
+ pmem_region = kzalloc(sizeof(struct of_pmem_region),
+ GFP_KERNEL);
+ if (!pmem_region)
+ continue;
+
+ pmem_region->region_desc = ndr_desc;
+ pmem_region->region = region;
+ pmem_region->priv = priv;
+
+ /* Save regions registered for use by the notification code */
+ mutex_lock(&pmem_region_lock);
+ list_add_tail(&pmem_region->list, &pmem_regions);
+ mutex_unlock(&pmem_region_lock);
}
+ mce_register_notifier(&mce_ue_nb);
return 0;
}
--
2.21.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges
2019-08-14 8:24 [PATCH 0/3] Add bad pmem bad blocks to bad range Santosh Sivaraj
2019-08-14 8:24 ` [PATCH 1/3] powerpc/mce: Add MCE notification chain Santosh Sivaraj
2019-08-14 8:24 ` [PATCH 2/3] of_pmem: Add memory ranges which took a mce to bad range Santosh Sivaraj
@ 2019-08-14 8:24 ` Santosh Sivaraj
2019-08-15 13:11 ` Oliver O'Halloran
2 siblings, 1 reply; 7+ messages in thread
From: Santosh Sivaraj @ 2019-08-14 8:24 UTC (permalink / raw)
To: linuxppc-dev
Cc: Aneesh Kumar K.V, Oliver O'Halloran, Chandan Rajendra,
Reza Arbab, Mahesh Salgaonkar
Subscribe to the MCE notification and add the physical address which
generated a memory error to nvdimm bad range.
Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
---
arch/powerpc/platforms/pseries/papr_scm.c | 65 +++++++++++++++++++++++
1 file changed, 65 insertions(+)
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index a5ac371a3f06..4d25c98a9835 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -12,6 +12,8 @@
#include <linux/libnvdimm.h>
#include <linux/platform_device.h>
#include <linux/delay.h>
+#include <linux/nd.h>
+#include <asm/mce.h>
#include <asm/plpar_wrappers.h>
@@ -39,8 +41,12 @@ struct papr_scm_priv {
struct resource res;
struct nd_region *region;
struct nd_interleave_set nd_set;
+ struct list_head list;
};
+LIST_HEAD(papr_nd_regions);
+DEFINE_MUTEX(papr_ndr_lock);
+
static int drc_pmem_bind(struct papr_scm_priv *p)
{
unsigned long ret[PLPAR_HCALL_BUFSIZE];
@@ -364,6 +370,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
dev_info(dev, "Region registered with target node %d and online node %d",
target_nid, online_nid);
+ mutex_lock(&papr_ndr_lock);
+ list_add_tail(&p->list, &papr_nd_regions);
+ mutex_unlock(&papr_ndr_lock);
+
return 0;
err: nvdimm_bus_unregister(p->bus);
@@ -371,6 +381,60 @@ err: nvdimm_bus_unregister(p->bus);
return -ENXIO;
}
+static int handle_mce_ue(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct machine_check_event *evt = data;
+ struct papr_scm_priv *p;
+ u64 phys_addr;
+
+ if (evt->error_type != MCE_ERROR_TYPE_UE)
+ return NOTIFY_DONE;
+
+ if (list_empty(&papr_nd_regions))
+ return NOTIFY_DONE;
+
+ phys_addr = evt->u.ue_error.physical_address +
+ (evt->u.ue_error.effective_address & ~PAGE_MASK);
+
+ if (!evt->u.ue_error.physical_address_provided ||
+ !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT)))
+ return NOTIFY_DONE;
+
+ mutex_lock(&papr_ndr_lock);
+ list_for_each_entry(p, &papr_nd_regions, list) {
+ struct resource res = p->res;
+ u64 aligned_addr;
+
+ if (res.start > phys_addr)
+ continue;
+
+ if (res.end < phys_addr)
+ continue;
+
+ aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES);
+ pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n",
+ aligned_addr, aligned_addr + L1_CACHE_BYTES);
+
+ if (nvdimm_bus_add_badrange(p->bus,
+ aligned_addr, L1_CACHE_BYTES))
+ pr_warn("Failed to add bad range (0x%llx -- 0x%llx)\n",
+ aligned_addr, aligned_addr + L1_CACHE_BYTES);
+
+ nvdimm_region_notify(p->region,
+ NVDIMM_REVALIDATE_POISON);
+
+ break;
+ }
+ mutex_unlock(&papr_ndr_lock);
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block mce_ue_nb = {
+ .notifier_call = handle_mce_ue
+};
+
static int papr_scm_probe(struct platform_device *pdev)
{
struct device_node *dn = pdev->dev.of_node;
@@ -456,6 +520,7 @@ static int papr_scm_probe(struct platform_device *pdev)
goto err2;
platform_set_drvdata(pdev, p);
+ mce_register_notifier(&mce_ue_nb);
return 0;
--
2.21.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges
2019-08-14 8:24 ` [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges Santosh Sivaraj
@ 2019-08-15 13:11 ` Oliver O'Halloran
2019-08-15 14:00 ` Santosh Sivaraj
0 siblings, 1 reply; 7+ messages in thread
From: Oliver O'Halloran @ 2019-08-15 13:11 UTC (permalink / raw)
To: Santosh Sivaraj
Cc: Reza Arbab, Aneesh Kumar K.V, Chandan Rajendra, linuxppc-dev,
Mahesh J Salgaonkar
On Wed, Aug 14, 2019 at 6:25 PM Santosh Sivaraj <santosh@fossix.org> wrote:
>
> Subscribe to the MCE notification and add the physical address which
> generated a memory error to nvdimm bad range.
>
> Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
> ---
> arch/powerpc/platforms/pseries/papr_scm.c | 65 +++++++++++++++++++++++
> 1 file changed, 65 insertions(+)
>
> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
> index a5ac371a3f06..4d25c98a9835 100644
> --- a/arch/powerpc/platforms/pseries/papr_scm.c
> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> @@ -12,6 +12,8 @@
> #include <linux/libnvdimm.h>
> #include <linux/platform_device.h>
> #include <linux/delay.h>
> +#include <linux/nd.h>
> +#include <asm/mce.h>
>
> #include <asm/plpar_wrappers.h>
>
> @@ -39,8 +41,12 @@ struct papr_scm_priv {
> struct resource res;
> struct nd_region *region;
> struct nd_interleave_set nd_set;
> + struct list_head list;
list is not a meaningful name. call it something more descriptive.
> };
>
> +LIST_HEAD(papr_nd_regions);
> +DEFINE_MUTEX(papr_ndr_lock);
Should this be a mutex or a spinlock? I don't know what context the
mce notifier is called from, but if it's not sleepable then a mutex
will cause problems. Did you test this with lockdep enabled?
> +
> static int drc_pmem_bind(struct papr_scm_priv *p)
> {
> unsigned long ret[PLPAR_HCALL_BUFSIZE];
> @@ -364,6 +370,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
> dev_info(dev, "Region registered with target node %d and online node %d",
> target_nid, online_nid);
>
> + mutex_lock(&papr_ndr_lock);
> + list_add_tail(&p->list, &papr_nd_regions);
> + mutex_unlock(&papr_ndr_lock);
> +
Where's the matching remove when we unbind the driver?
> return 0;
>
> err: nvdimm_bus_unregister(p->bus);
> @@ -371,6 +381,60 @@ err: nvdimm_bus_unregister(p->bus);
> return -ENXIO;
> }
>
> +static int handle_mce_ue(struct notifier_block *nb, unsigned long val,
> + void *data)
> +{
> + struct machine_check_event *evt = data;
> + struct papr_scm_priv *p;
> + u64 phys_addr;
> +
> + if (evt->error_type != MCE_ERROR_TYPE_UE)
> + return NOTIFY_DONE;
> +
> + if (list_empty(&papr_nd_regions))
> + return NOTIFY_DONE;
> +
> + phys_addr = evt->u.ue_error.physical_address +
> + (evt->u.ue_error.effective_address & ~PAGE_MASK);
Wait what? Why is physical_address page aligned, but effective_address
not? Not a problem with this patch, but still, what the hell?
> + if (!evt->u.ue_error.physical_address_provided ||
> + !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT)))
> + return NOTIFY_DONE;
> +
> + mutex_lock(&papr_ndr_lock);
> + list_for_each_entry(p, &papr_nd_regions, list) {
> + struct resource res = p->res;
> + u64 aligned_addr;
> +
> + if (res.start > phys_addr)
> + continue;
> +
> + if (res.end < phys_addr)
> + continue;
surely there's a helper for this
> +
> + aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES);
> + pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n",
> + aligned_addr, aligned_addr + L1_CACHE_BYTES);
> +
> + if (nvdimm_bus_add_badrange(p->bus,
> + aligned_addr, L1_CACHE_BYTES))
> + pr_warn("Failed to add bad range (0x%llx -- 0x%llx)\n",
> + aligned_addr, aligned_addr + L1_CACHE_BYTES);
> +
> + nvdimm_region_notify(p->region,
> + NVDIMM_REVALIDATE_POISON);
> +
> + break;
nit: you can avoid stacking indetation levels by breaking out of the
loop as soon as you've found the region you're looking for.
> + }
> + mutex_unlock(&papr_ndr_lock);
> +
> + return NOTIFY_OK;
> +}
> +
> +static struct notifier_block mce_ue_nb = {
> + .notifier_call = handle_mce_ue
> +};
> +
> static int papr_scm_probe(struct platform_device *pdev)
> {
> struct device_node *dn = pdev->dev.of_node;
> @@ -456,6 +520,7 @@ static int papr_scm_probe(struct platform_device *pdev)
> goto err2;
>
> platform_set_drvdata(pdev, p);
> + mce_register_notifier(&mce_ue_nb);
Either get rid of the global region list and have a notifier block in
each device's driver private data, or keep the global list and
register the notifier in module_init(). Re-registering the notifier
each time a seperate device is probed seems very sketchy.
> return 0;
>
> --
> 2.21.0
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges
2019-08-15 13:11 ` Oliver O'Halloran
@ 2019-08-15 14:00 ` Santosh Sivaraj
0 siblings, 0 replies; 7+ messages in thread
From: Santosh Sivaraj @ 2019-08-15 14:00 UTC (permalink / raw)
To: Oliver O'Halloran
Cc: Reza Arbab, Aneesh Kumar K.V, Chandan Rajendra, linuxppc-dev,
Mahesh J Salgaonkar
"Oliver O'Halloran" <oohall@gmail.com> writes:
> On Wed, Aug 14, 2019 at 6:25 PM Santosh Sivaraj <santosh@fossix.org> wrote:
>>
>> Subscribe to the MCE notification and add the physical address which
>> generated a memory error to nvdimm bad range.
>>
>> Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
>> ---
>> arch/powerpc/platforms/pseries/papr_scm.c | 65 +++++++++++++++++++++++
>> 1 file changed, 65 insertions(+)
>>
>> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
>> index a5ac371a3f06..4d25c98a9835 100644
>> --- a/arch/powerpc/platforms/pseries/papr_scm.c
>> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
>> @@ -12,6 +12,8 @@
>> #include <linux/libnvdimm.h>
>> #include <linux/platform_device.h>
>> #include <linux/delay.h>
>> +#include <linux/nd.h>
>> +#include <asm/mce.h>
>>
>> #include <asm/plpar_wrappers.h>
>>
>> @@ -39,8 +41,12 @@ struct papr_scm_priv {
>> struct resource res;
>> struct nd_region *region;
>> struct nd_interleave_set nd_set;
>> + struct list_head list;
>
> list is not a meaningful name. call it something more descriptive.
>
>> };
>>
>> +LIST_HEAD(papr_nd_regions);
>> +DEFINE_MUTEX(papr_ndr_lock);
>
> Should this be a mutex or a spinlock? I don't know what context the
> mce notifier is called from, but if it's not sleepable then a mutex
> will cause problems. Did you test this with lockdep enabled?
This would be a mutex, we are called from a blocking notifier.
>
>> +
>> static int drc_pmem_bind(struct papr_scm_priv *p)
>> {
>> unsigned long ret[PLPAR_HCALL_BUFSIZE];
>> @@ -364,6 +370,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
>> dev_info(dev, "Region registered with target node %d and online node %d",
>> target_nid, online_nid);
>>
>> + mutex_lock(&papr_ndr_lock);
>> + list_add_tail(&p->list, &papr_nd_regions);
>> + mutex_unlock(&papr_ndr_lock);
>> +
>
> Where's the matching remove when we unbind the driver?
Missed it completely. Will fix it.
>
>> return 0;
>>pp
>> err: nvdimm_bus_unregister(p->bus);
>> @@ -371,6 +381,60 @@ err: nvdimm_bus_unregister(p->bus);
>> return -ENXIO;
>> }
>>
>> +static int handle_mce_ue(struct notifier_block *nb, unsigned long val,
>> + void *data)
>> +{
>> + struct machine_check_event *evt = data;
>> + struct papr_scm_priv *p;
>> + u64 phys_addr;
>> +
>> + if (evt->error_type != MCE_ERROR_TYPE_UE)
>> + return NOTIFY_DONE;
>> +
>> + if (list_empty(&papr_nd_regions))
>> + return NOTIFY_DONE;
>> +
>> + phys_addr = evt->u.ue_error.physical_address +
>> + (evt->u.ue_error.effective_address & ~PAGE_MASK);
>
> Wait what? Why is physical_address page aligned, but effective_address
> not? Not a problem with this patch, but still, what the hell?
Not sure why, but its the way now. I can see if I can update it if it makes
sense in a later patch.
>
>> + if (!evt->u.ue_error.physical_address_provided ||
>> + !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT)))
>> + return NOTIFY_DONE;
>> +
>> + mutex_lock(&papr_ndr_lock);
>> + list_for_each_entry(p, &papr_nd_regions, list) {
>> + struct resource res = p->res;
>> + u64 aligned_addr;
>> +
>
>> + if (res.start > phys_addr)
>> + continue;
>> +
>> + if (res.end < phys_addr)
>> + continue;
>
> surely there's a helper for this
>
>> +
>> + aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES);
>> + pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n",
>> + aligned_addr, aligned_addr + L1_CACHE_BYTES);
>> +
>> + if (nvdimm_bus_add_badrange(p->bus,
>> + aligned_addr, L1_CACHE_BYTES))
>> + pr_warn("Failed to add bad range (0x%llx -- 0x%llx)\n",
>> + aligned_addr, aligned_addr + L1_CACHE_BYTES);
>> +
>> + nvdimm_region_notify(p->region,
>> + NVDIMM_REVALIDATE_POISON);
>> +
>> + break;
>
> nit: you can avoid stacking indetation levels by breaking out of the
> loop as soon as you've found the region you're looking for.
True.
>
>> + }
>> + mutex_unlock(&papr_ndr_lock);
>> +
>> + return NOTIFY_OK;
>> +}
>> +
>> +static struct notifier_block mce_ue_nb = {
>> + .notifier_call = handle_mce_ue
>> +};
>> +
>> static int papr_scm_probe(struct platform_device *pdev)
>> {
>> struct device_node *dn = pdev->dev.of_node;
>> @@ -456,6 +520,7 @@ static int papr_scm_probe(struct platform_device *pdev)
>> goto err2;
>>
>> platform_set_drvdata(pdev, p);
>> + mce_register_notifier(&mce_ue_nb);
>
> Either get rid of the global region list and have a notifier block in
> each device's driver private data, or keep the global list and
> register the notifier in module_init(). Re-registering the notifier
> each time a seperate device is probed seems very sketchy.
Registering the notifier in the init is simpler. I will change it.
>
>> return 0;
>>
>> --
>> 2.21.0
p>>
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges
2019-08-20 2:30 [PATCH 0/3] Add bad pmem bad blocks to bad range Santosh Sivaraj
@ 2019-08-20 2:30 ` Santosh Sivaraj
0 siblings, 0 replies; 7+ messages in thread
From: Santosh Sivaraj @ 2019-08-20 2:30 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Chandan Rajendra, Oliver O'Halloran, Mahesh Salgaonkar
Subscribe to the MCE notification and add the physical address which
generated a memory error to nvdimm bad range.
Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
---
arch/powerpc/platforms/pseries/papr_scm.c | 86 ++++++++++++++++++++++-
1 file changed, 85 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index a5ac371a3f06..e38f7febc5d9 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -12,6 +12,8 @@
#include <linux/libnvdimm.h>
#include <linux/platform_device.h>
#include <linux/delay.h>
+#include <linux/nd.h>
+#include <asm/mce.h>
#include <asm/plpar_wrappers.h>
@@ -39,8 +41,12 @@ struct papr_scm_priv {
struct resource res;
struct nd_region *region;
struct nd_interleave_set nd_set;
+ struct list_head region_list;
};
+LIST_HEAD(papr_nd_regions);
+DEFINE_MUTEX(papr_ndr_lock);
+
static int drc_pmem_bind(struct papr_scm_priv *p)
{
unsigned long ret[PLPAR_HCALL_BUFSIZE];
@@ -364,6 +370,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
dev_info(dev, "Region registered with target node %d and online node %d",
target_nid, online_nid);
+ mutex_lock(&papr_ndr_lock);
+ list_add_tail(&p->region_list, &papr_nd_regions);
+ mutex_unlock(&papr_ndr_lock);
+
return 0;
err: nvdimm_bus_unregister(p->bus);
@@ -371,6 +381,57 @@ err: nvdimm_bus_unregister(p->bus);
return -ENXIO;
}
+static int handle_mce_ue(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct machine_check_event *evt = data;
+ struct papr_scm_priv *p;
+ u64 phys_addr, aligned_addr;
+ bool found = false;
+
+ if (evt->error_type != MCE_ERROR_TYPE_UE)
+ return NOTIFY_DONE;
+
+ if (list_empty(&papr_nd_regions))
+ return NOTIFY_DONE;
+
+ phys_addr = evt->u.ue_error.physical_address +
+ (evt->u.ue_error.effective_address & ~PAGE_MASK);
+
+ if (!evt->u.ue_error.physical_address_provided ||
+ !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT)))
+ return NOTIFY_DONE;
+
+ mutex_lock(&papr_ndr_lock);
+ list_for_each_entry(p, &papr_nd_regions, region_list) {
+ struct resource res = p->res;
+
+ if (phys_addr >= res.start && phys_addr <= res.end) {
+ found = true;
+ break;
+ }
+ }
+ mutex_unlock(&papr_ndr_lock);
+
+ if (!found)
+ return NOTIFY_DONE;
+
+ aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES);
+ if (nvdimm_bus_add_badrange(p->bus, aligned_addr, L1_CACHE_BYTES))
+ return NOTIFY_DONE;
+
+ pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n",
+ aligned_addr, aligned_addr + L1_CACHE_BYTES);
+
+ nvdimm_region_notify(p->region, NVDIMM_REVALIDATE_POISON);
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block mce_ue_nb = {
+ .notifier_call = handle_mce_ue
+};
+
static int papr_scm_probe(struct platform_device *pdev)
{
struct device_node *dn = pdev->dev.of_node;
@@ -456,6 +517,7 @@ static int papr_scm_probe(struct platform_device *pdev)
goto err2;
platform_set_drvdata(pdev, p);
+ mce_register_notifier(&mce_ue_nb);
return 0;
@@ -468,6 +530,10 @@ static int papr_scm_remove(struct platform_device *pdev)
{
struct papr_scm_priv *p = platform_get_drvdata(pdev);
+ mutex_lock(&papr_ndr_lock);
+ list_del(&(p->region_list));
+ mutex_unlock(&papr_ndr_lock);
+
nvdimm_bus_unregister(p->bus);
drc_pmem_unbind(p);
kfree(p);
@@ -490,7 +556,25 @@ static struct platform_driver papr_scm_driver = {
},
};
-module_platform_driver(papr_scm_driver);
+static int __init papr_scm_init(void)
+{
+ int ret;
+
+ ret = platform_driver_register(&papr_scm_driver);
+ if (!ret)
+ mce_register_notifier(&mce_ue_nb);
+
+ return ret;
+}
+module_init(papr_scm_init);
+
+static void __exit papr_scm_exit(void)
+{
+ mce_unregister_notifier(&mce_ue_nb);
+ platform_driver_unregister(&papr_scm_driver);
+}
+module_exit(papr_scm_exit);
+
MODULE_DEVICE_TABLE(of, papr_scm_match);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("IBM Corporation");
--
2.21.0
^ permalink raw reply related [flat|nested] 7+ messages in thread