* [PATCH 0/3] Add bad pmem bad blocks to bad range @ 2019-08-14 8:24 Santosh Sivaraj 2019-08-14 8:24 ` [PATCH 1/3] powerpc/mce: Add MCE notification chain Santosh Sivaraj ` (2 more replies) 0 siblings, 3 replies; 8+ messages in thread From: Santosh Sivaraj @ 2019-08-14 8:24 UTC (permalink / raw) To: linuxppc-dev Cc: Aneesh Kumar K.V, Oliver O'Halloran, Chandan Rajendra, Reza Arbab, Mahesh Salgaonkar This series, which should be based on top of the still un-merged "powerpc: implement machine check safe memcpy" series, adds support to add the bad blocks which generated an MCE to the NVDIMM bad blocks. The next access of the same memory will be blocked by the NVDIMM layer itself. Santosh Sivaraj (3): powerpc/mce: Add MCE notification chain of_pmem: Add memory ranges which took a mce to bad range papr/scm: Add bad memory ranges to nvdimm bad ranges arch/powerpc/include/asm/mce.h | 3 + arch/powerpc/kernel/mce.c | 15 +++ arch/powerpc/platforms/pseries/papr_scm.c | 65 ++++++++++++ drivers/nvdimm/of_pmem.c | 122 ++++++++++++++++++---- 4 files changed, 186 insertions(+), 19 deletions(-) -- 2.21.0 ^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 1/3] powerpc/mce: Add MCE notification chain 2019-08-14 8:24 [PATCH 0/3] Add bad pmem bad blocks to bad range Santosh Sivaraj @ 2019-08-14 8:24 ` Santosh Sivaraj 2019-08-14 8:24 ` [PATCH 2/3] of_pmem: Add memory ranges which took a mce to bad range Santosh Sivaraj 2019-08-14 8:24 ` [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges Santosh Sivaraj 2 siblings, 0 replies; 8+ messages in thread From: Santosh Sivaraj @ 2019-08-14 8:24 UTC (permalink / raw) To: linuxppc-dev Cc: Aneesh Kumar K.V, Oliver O'Halloran, Chandan Rajendra, Reza Arbab, Mahesh Salgaonkar This is needed to report bad blocks for persistent memory. Signed-off-by: Santosh Sivaraj <santosh@fossix.org> --- arch/powerpc/include/asm/mce.h | 3 +++ arch/powerpc/kernel/mce.c | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index e1931c8c2743..b1c6363f924c 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -212,6 +212,9 @@ extern void machine_check_queue_event(void); extern void machine_check_print_event_info(struct machine_check_event *evt, bool user_mode, bool in_guest); unsigned long addr_to_phys(struct pt_regs *regs, unsigned long addr); +int mce_register_notifier(struct notifier_block *nb); +int mce_unregister_notifier(struct notifier_block *nb); + #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index ec4b3e1087be..a78210ca6cd9 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -47,6 +47,20 @@ static struct irq_work mce_ue_event_irq_work = { DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); +static BLOCKING_NOTIFIER_HEAD(mce_notifier_list); + +int mce_register_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&mce_notifier_list, nb); +} +EXPORT_SYMBOL_GPL(mce_register_notifier); + +int mce_unregister_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&mce_notifier_list, nb); +} +EXPORT_SYMBOL_GPL(mce_unregister_notifier); + static void mce_set_error_info(struct machine_check_event *mce, struct mce_error_info *mce_err) { @@ -263,6 +277,7 @@ static void machine_process_ue_event(struct work_struct *work) while (__this_cpu_read(mce_ue_count) > 0) { index = __this_cpu_read(mce_ue_count) - 1; evt = this_cpu_ptr(&mce_ue_event_queue[index]); + blocking_notifier_call_chain(&mce_notifier_list, 0, evt); #ifdef CONFIG_MEMORY_FAILURE /* * This should probably queued elsewhere, but -- 2.21.0 ^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/3] of_pmem: Add memory ranges which took a mce to bad range 2019-08-14 8:24 [PATCH 0/3] Add bad pmem bad blocks to bad range Santosh Sivaraj 2019-08-14 8:24 ` [PATCH 1/3] powerpc/mce: Add MCE notification chain Santosh Sivaraj @ 2019-08-14 8:24 ` Santosh Sivaraj 2019-08-14 8:24 ` [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges Santosh Sivaraj 2 siblings, 0 replies; 8+ messages in thread From: Santosh Sivaraj @ 2019-08-14 8:24 UTC (permalink / raw) To: linuxppc-dev Cc: Aneesh Kumar K.V, Oliver O'Halloran, Chandan Rajendra, Reza Arbab, Mahesh Salgaonkar Subscribe to the MCE notification and add the physical address which generated a memory error to nvdimm bad range. Signed-off-by: Santosh Sivaraj <santosh@fossix.org> --- drivers/nvdimm/of_pmem.c | 122 +++++++++++++++++++++++++++++++++------ 1 file changed, 103 insertions(+), 19 deletions(-) diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c index a0c8dcfa0bf9..828dbfe44ca6 100644 --- a/drivers/nvdimm/of_pmem.c +++ b/drivers/nvdimm/of_pmem.c @@ -8,6 +8,9 @@ #include <linux/module.h> #include <linux/ioport.h> #include <linux/slab.h> +#include <linux/string.h> +#include <linux/nd.h> +#include <asm/mce.h> static const struct attribute_group *region_attr_groups[] = { &nd_region_attribute_group, @@ -25,11 +28,77 @@ struct of_pmem_private { struct nvdimm_bus *bus; }; +struct of_pmem_region { + struct of_pmem_private *priv; + struct nd_region_desc *region_desc; + struct nd_region *region; + struct list_head list; +}; + +LIST_HEAD(pmem_regions); +DEFINE_MUTEX(pmem_region_lock); + +static int handle_mce_ue(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct machine_check_event *evt = data; + struct of_pmem_region *pmem_region; + u64 phys_addr; + + if (evt->error_type != MCE_ERROR_TYPE_UE) + return NOTIFY_DONE; + + if (list_empty(&pmem_regions)) + return NOTIFY_DONE; + + phys_addr = evt->u.ue_error.physical_address + + (evt->u.ue_error.effective_address & ~PAGE_MASK); + + if (!evt->u.ue_error.physical_address_provided || + !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT))) + return NOTIFY_DONE; + + mutex_lock(&pmem_region_lock); + list_for_each_entry(pmem_region, &pmem_regions, list) { + struct resource *res = pmem_region->region_desc->res; + u64 aligned_addr; + + if (res->start > phys_addr) + continue; + + if (res->end < phys_addr) + continue; + + aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES); + pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n", + aligned_addr, aligned_addr + L1_CACHE_BYTES); + + if (nvdimm_bus_add_badrange(pmem_region->priv->bus, + aligned_addr, L1_CACHE_BYTES)) + pr_warn("Failed to add bad range (0x%llx -- 0x%llx)\n", + aligned_addr, aligned_addr + L1_CACHE_BYTES); + + nvdimm_region_notify(pmem_region->region, + NVDIMM_REVALIDATE_POISON); + + break; + } + mutex_unlock(&pmem_region_lock); + + return NOTIFY_OK; +} + +static struct notifier_block mce_ue_nb = { + .notifier_call = handle_mce_ue +}; + static int of_pmem_region_probe(struct platform_device *pdev) { struct of_pmem_private *priv; struct device_node *np; struct nvdimm_bus *bus; + struct of_pmem_region *pmem_region; + struct nd_region_desc *ndr_desc; bool is_volatile; int i; @@ -58,34 +127,49 @@ static int of_pmem_region_probe(struct platform_device *pdev) is_volatile ? "volatile" : "non-volatile", np); for (i = 0; i < pdev->num_resources; i++) { - struct nd_region_desc ndr_desc; struct nd_region *region; - /* - * NB: libnvdimm copies the data from ndr_desc into it's own - * structures so passing a stack pointer is fine. - */ - memset(&ndr_desc, 0, sizeof(ndr_desc)); - ndr_desc.attr_groups = region_attr_groups; - ndr_desc.numa_node = dev_to_node(&pdev->dev); - ndr_desc.target_node = ndr_desc.numa_node; - ndr_desc.res = &pdev->resource[i]; - ndr_desc.of_node = np; - set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); + ndr_desc = kzalloc(sizeof(struct nd_region_desc), GFP_KERNEL); + if (!ndr_desc) { + nvdimm_bus_unregister(priv->bus); + kfree(priv); + return -ENOMEM; + } + + ndr_desc->attr_groups = region_attr_groups; + ndr_desc->numa_node = dev_to_node(&pdev->dev); + ndr_desc->target_node = ndr_desc->numa_node; + ndr_desc->res = &pdev->resource[i]; + ndr_desc->of_node = np; + set_bit(ND_REGION_PAGEMAP, &ndr_desc->flags); if (is_volatile) - region = nvdimm_volatile_region_create(bus, &ndr_desc); + region = nvdimm_volatile_region_create(bus, ndr_desc); else - region = nvdimm_pmem_region_create(bus, &ndr_desc); + region = nvdimm_pmem_region_create(bus, ndr_desc); if (!region) - dev_warn(&pdev->dev, "Unable to register region %pR from %pOF\n", - ndr_desc.res, np); - else - dev_dbg(&pdev->dev, "Registered region %pR from %pOF\n", - ndr_desc.res, np); + continue; + + dev_dbg(&pdev->dev, "Registered region %pR from %pOF\n", + ndr_desc->res, np); + + pmem_region = kzalloc(sizeof(struct of_pmem_region), + GFP_KERNEL); + if (!pmem_region) + continue; + + pmem_region->region_desc = ndr_desc; + pmem_region->region = region; + pmem_region->priv = priv; + + /* Save regions registered for use by the notification code */ + mutex_lock(&pmem_region_lock); + list_add_tail(&pmem_region->list, &pmem_regions); + mutex_unlock(&pmem_region_lock); } + mce_register_notifier(&mce_ue_nb); return 0; } -- 2.21.0 ^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges 2019-08-14 8:24 [PATCH 0/3] Add bad pmem bad blocks to bad range Santosh Sivaraj 2019-08-14 8:24 ` [PATCH 1/3] powerpc/mce: Add MCE notification chain Santosh Sivaraj 2019-08-14 8:24 ` [PATCH 2/3] of_pmem: Add memory ranges which took a mce to bad range Santosh Sivaraj @ 2019-08-14 8:24 ` Santosh Sivaraj 2019-08-15 13:11 ` Oliver O'Halloran 2 siblings, 1 reply; 8+ messages in thread From: Santosh Sivaraj @ 2019-08-14 8:24 UTC (permalink / raw) To: linuxppc-dev Cc: Aneesh Kumar K.V, Oliver O'Halloran, Chandan Rajendra, Reza Arbab, Mahesh Salgaonkar Subscribe to the MCE notification and add the physical address which generated a memory error to nvdimm bad range. Signed-off-by: Santosh Sivaraj <santosh@fossix.org> --- arch/powerpc/platforms/pseries/papr_scm.c | 65 +++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index a5ac371a3f06..4d25c98a9835 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -12,6 +12,8 @@ #include <linux/libnvdimm.h> #include <linux/platform_device.h> #include <linux/delay.h> +#include <linux/nd.h> +#include <asm/mce.h> #include <asm/plpar_wrappers.h> @@ -39,8 +41,12 @@ struct papr_scm_priv { struct resource res; struct nd_region *region; struct nd_interleave_set nd_set; + struct list_head list; }; +LIST_HEAD(papr_nd_regions); +DEFINE_MUTEX(papr_ndr_lock); + static int drc_pmem_bind(struct papr_scm_priv *p) { unsigned long ret[PLPAR_HCALL_BUFSIZE]; @@ -364,6 +370,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) dev_info(dev, "Region registered with target node %d and online node %d", target_nid, online_nid); + mutex_lock(&papr_ndr_lock); + list_add_tail(&p->list, &papr_nd_regions); + mutex_unlock(&papr_ndr_lock); + return 0; err: nvdimm_bus_unregister(p->bus); @@ -371,6 +381,60 @@ err: nvdimm_bus_unregister(p->bus); return -ENXIO; } +static int handle_mce_ue(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct machine_check_event *evt = data; + struct papr_scm_priv *p; + u64 phys_addr; + + if (evt->error_type != MCE_ERROR_TYPE_UE) + return NOTIFY_DONE; + + if (list_empty(&papr_nd_regions)) + return NOTIFY_DONE; + + phys_addr = evt->u.ue_error.physical_address + + (evt->u.ue_error.effective_address & ~PAGE_MASK); + + if (!evt->u.ue_error.physical_address_provided || + !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT))) + return NOTIFY_DONE; + + mutex_lock(&papr_ndr_lock); + list_for_each_entry(p, &papr_nd_regions, list) { + struct resource res = p->res; + u64 aligned_addr; + + if (res.start > phys_addr) + continue; + + if (res.end < phys_addr) + continue; + + aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES); + pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n", + aligned_addr, aligned_addr + L1_CACHE_BYTES); + + if (nvdimm_bus_add_badrange(p->bus, + aligned_addr, L1_CACHE_BYTES)) + pr_warn("Failed to add bad range (0x%llx -- 0x%llx)\n", + aligned_addr, aligned_addr + L1_CACHE_BYTES); + + nvdimm_region_notify(p->region, + NVDIMM_REVALIDATE_POISON); + + break; + } + mutex_unlock(&papr_ndr_lock); + + return NOTIFY_OK; +} + +static struct notifier_block mce_ue_nb = { + .notifier_call = handle_mce_ue +}; + static int papr_scm_probe(struct platform_device *pdev) { struct device_node *dn = pdev->dev.of_node; @@ -456,6 +520,7 @@ static int papr_scm_probe(struct platform_device *pdev) goto err2; platform_set_drvdata(pdev, p); + mce_register_notifier(&mce_ue_nb); return 0; -- 2.21.0 ^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges 2019-08-14 8:24 ` [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges Santosh Sivaraj @ 2019-08-15 13:11 ` Oliver O'Halloran 2019-08-15 14:00 ` Santosh Sivaraj 0 siblings, 1 reply; 8+ messages in thread From: Oliver O'Halloran @ 2019-08-15 13:11 UTC (permalink / raw) To: Santosh Sivaraj Cc: Reza Arbab, Aneesh Kumar K.V, Chandan Rajendra, linuxppc-dev, Mahesh J Salgaonkar On Wed, Aug 14, 2019 at 6:25 PM Santosh Sivaraj <santosh@fossix.org> wrote: > > Subscribe to the MCE notification and add the physical address which > generated a memory error to nvdimm bad range. > > Signed-off-by: Santosh Sivaraj <santosh@fossix.org> > --- > arch/powerpc/platforms/pseries/papr_scm.c | 65 +++++++++++++++++++++++ > 1 file changed, 65 insertions(+) > > diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c > index a5ac371a3f06..4d25c98a9835 100644 > --- a/arch/powerpc/platforms/pseries/papr_scm.c > +++ b/arch/powerpc/platforms/pseries/papr_scm.c > @@ -12,6 +12,8 @@ > #include <linux/libnvdimm.h> > #include <linux/platform_device.h> > #include <linux/delay.h> > +#include <linux/nd.h> > +#include <asm/mce.h> > > #include <asm/plpar_wrappers.h> > > @@ -39,8 +41,12 @@ struct papr_scm_priv { > struct resource res; > struct nd_region *region; > struct nd_interleave_set nd_set; > + struct list_head list; list is not a meaningful name. call it something more descriptive. > }; > > +LIST_HEAD(papr_nd_regions); > +DEFINE_MUTEX(papr_ndr_lock); Should this be a mutex or a spinlock? I don't know what context the mce notifier is called from, but if it's not sleepable then a mutex will cause problems. Did you test this with lockdep enabled? > + > static int drc_pmem_bind(struct papr_scm_priv *p) > { > unsigned long ret[PLPAR_HCALL_BUFSIZE]; > @@ -364,6 +370,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) > dev_info(dev, "Region registered with target node %d and online node %d", > target_nid, online_nid); > > + mutex_lock(&papr_ndr_lock); > + list_add_tail(&p->list, &papr_nd_regions); > + mutex_unlock(&papr_ndr_lock); > + Where's the matching remove when we unbind the driver? > return 0; > > err: nvdimm_bus_unregister(p->bus); > @@ -371,6 +381,60 @@ err: nvdimm_bus_unregister(p->bus); > return -ENXIO; > } > > +static int handle_mce_ue(struct notifier_block *nb, unsigned long val, > + void *data) > +{ > + struct machine_check_event *evt = data; > + struct papr_scm_priv *p; > + u64 phys_addr; > + > + if (evt->error_type != MCE_ERROR_TYPE_UE) > + return NOTIFY_DONE; > + > + if (list_empty(&papr_nd_regions)) > + return NOTIFY_DONE; > + > + phys_addr = evt->u.ue_error.physical_address + > + (evt->u.ue_error.effective_address & ~PAGE_MASK); Wait what? Why is physical_address page aligned, but effective_address not? Not a problem with this patch, but still, what the hell? > + if (!evt->u.ue_error.physical_address_provided || > + !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT))) > + return NOTIFY_DONE; > + > + mutex_lock(&papr_ndr_lock); > + list_for_each_entry(p, &papr_nd_regions, list) { > + struct resource res = p->res; > + u64 aligned_addr; > + > + if (res.start > phys_addr) > + continue; > + > + if (res.end < phys_addr) > + continue; surely there's a helper for this > + > + aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES); > + pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n", > + aligned_addr, aligned_addr + L1_CACHE_BYTES); > + > + if (nvdimm_bus_add_badrange(p->bus, > + aligned_addr, L1_CACHE_BYTES)) > + pr_warn("Failed to add bad range (0x%llx -- 0x%llx)\n", > + aligned_addr, aligned_addr + L1_CACHE_BYTES); > + > + nvdimm_region_notify(p->region, > + NVDIMM_REVALIDATE_POISON); > + > + break; nit: you can avoid stacking indetation levels by breaking out of the loop as soon as you've found the region you're looking for. > + } > + mutex_unlock(&papr_ndr_lock); > + > + return NOTIFY_OK; > +} > + > +static struct notifier_block mce_ue_nb = { > + .notifier_call = handle_mce_ue > +}; > + > static int papr_scm_probe(struct platform_device *pdev) > { > struct device_node *dn = pdev->dev.of_node; > @@ -456,6 +520,7 @@ static int papr_scm_probe(struct platform_device *pdev) > goto err2; > > platform_set_drvdata(pdev, p); > + mce_register_notifier(&mce_ue_nb); Either get rid of the global region list and have a notifier block in each device's driver private data, or keep the global list and register the notifier in module_init(). Re-registering the notifier each time a seperate device is probed seems very sketchy. > return 0; > > -- > 2.21.0 > ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges 2019-08-15 13:11 ` Oliver O'Halloran @ 2019-08-15 14:00 ` Santosh Sivaraj 0 siblings, 0 replies; 8+ messages in thread From: Santosh Sivaraj @ 2019-08-15 14:00 UTC (permalink / raw) To: Oliver O'Halloran Cc: Reza Arbab, Aneesh Kumar K.V, Chandan Rajendra, linuxppc-dev, Mahesh J Salgaonkar "Oliver O'Halloran" <oohall@gmail.com> writes: > On Wed, Aug 14, 2019 at 6:25 PM Santosh Sivaraj <santosh@fossix.org> wrote: >> >> Subscribe to the MCE notification and add the physical address which >> generated a memory error to nvdimm bad range. >> >> Signed-off-by: Santosh Sivaraj <santosh@fossix.org> >> --- >> arch/powerpc/platforms/pseries/papr_scm.c | 65 +++++++++++++++++++++++ >> 1 file changed, 65 insertions(+) >> >> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c >> index a5ac371a3f06..4d25c98a9835 100644 >> --- a/arch/powerpc/platforms/pseries/papr_scm.c >> +++ b/arch/powerpc/platforms/pseries/papr_scm.c >> @@ -12,6 +12,8 @@ >> #include <linux/libnvdimm.h> >> #include <linux/platform_device.h> >> #include <linux/delay.h> >> +#include <linux/nd.h> >> +#include <asm/mce.h> >> >> #include <asm/plpar_wrappers.h> >> >> @@ -39,8 +41,12 @@ struct papr_scm_priv { >> struct resource res; >> struct nd_region *region; >> struct nd_interleave_set nd_set; >> + struct list_head list; > > list is not a meaningful name. call it something more descriptive. > >> }; >> >> +LIST_HEAD(papr_nd_regions); >> +DEFINE_MUTEX(papr_ndr_lock); > > Should this be a mutex or a spinlock? I don't know what context the > mce notifier is called from, but if it's not sleepable then a mutex > will cause problems. Did you test this with lockdep enabled? This would be a mutex, we are called from a blocking notifier. > >> + >> static int drc_pmem_bind(struct papr_scm_priv *p) >> { >> unsigned long ret[PLPAR_HCALL_BUFSIZE]; >> @@ -364,6 +370,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) >> dev_info(dev, "Region registered with target node %d and online node %d", >> target_nid, online_nid); >> >> + mutex_lock(&papr_ndr_lock); >> + list_add_tail(&p->list, &papr_nd_regions); >> + mutex_unlock(&papr_ndr_lock); >> + > > Where's the matching remove when we unbind the driver? Missed it completely. Will fix it. > >> return 0; >>pp >> err: nvdimm_bus_unregister(p->bus); >> @@ -371,6 +381,60 @@ err: nvdimm_bus_unregister(p->bus); >> return -ENXIO; >> } >> >> +static int handle_mce_ue(struct notifier_block *nb, unsigned long val, >> + void *data) >> +{ >> + struct machine_check_event *evt = data; >> + struct papr_scm_priv *p; >> + u64 phys_addr; >> + >> + if (evt->error_type != MCE_ERROR_TYPE_UE) >> + return NOTIFY_DONE; >> + >> + if (list_empty(&papr_nd_regions)) >> + return NOTIFY_DONE; >> + >> + phys_addr = evt->u.ue_error.physical_address + >> + (evt->u.ue_error.effective_address & ~PAGE_MASK); > > Wait what? Why is physical_address page aligned, but effective_address > not? Not a problem with this patch, but still, what the hell? Not sure why, but its the way now. I can see if I can update it if it makes sense in a later patch. > >> + if (!evt->u.ue_error.physical_address_provided || >> + !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT))) >> + return NOTIFY_DONE; >> + >> + mutex_lock(&papr_ndr_lock); >> + list_for_each_entry(p, &papr_nd_regions, list) { >> + struct resource res = p->res; >> + u64 aligned_addr; >> + > >> + if (res.start > phys_addr) >> + continue; >> + >> + if (res.end < phys_addr) >> + continue; > > surely there's a helper for this > >> + >> + aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES); >> + pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n", >> + aligned_addr, aligned_addr + L1_CACHE_BYTES); >> + >> + if (nvdimm_bus_add_badrange(p->bus, >> + aligned_addr, L1_CACHE_BYTES)) >> + pr_warn("Failed to add bad range (0x%llx -- 0x%llx)\n", >> + aligned_addr, aligned_addr + L1_CACHE_BYTES); >> + >> + nvdimm_region_notify(p->region, >> + NVDIMM_REVALIDATE_POISON); >> + >> + break; > > nit: you can avoid stacking indetation levels by breaking out of the > loop as soon as you've found the region you're looking for. True. > >> + } >> + mutex_unlock(&papr_ndr_lock); >> + >> + return NOTIFY_OK; >> +} >> + >> +static struct notifier_block mce_ue_nb = { >> + .notifier_call = handle_mce_ue >> +}; >> + >> static int papr_scm_probe(struct platform_device *pdev) >> { >> struct device_node *dn = pdev->dev.of_node; >> @@ -456,6 +520,7 @@ static int papr_scm_probe(struct platform_device *pdev) >> goto err2; >> >> platform_set_drvdata(pdev, p); >> + mce_register_notifier(&mce_ue_nb); > > Either get rid of the global region list and have a notifier block in > each device's driver private data, or keep the global list and > register the notifier in module_init(). Re-registering the notifier > each time a seperate device is probed seems very sketchy. Registering the notifier in the init is simpler. I will change it. > >> return 0; >> >> -- >> 2.21.0 p>> ^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 0/3] Add bad pmem bad blocks to bad range @ 2019-08-20 2:30 Santosh Sivaraj 2019-08-20 2:30 ` [PATCH 2/3] of_pmem: Add memory ranges which took a mce " Santosh Sivaraj 0 siblings, 1 reply; 8+ messages in thread From: Santosh Sivaraj @ 2019-08-20 2:30 UTC (permalink / raw) To: linuxppc-dev; +Cc: Chandan Rajendra, Oliver O'Halloran, Mahesh Salgaonkar This series, which should be based on top of the still un-merged "powerpc: implement machine check safe memcpy" series, adds support to add the bad blocks which generated an MCE to the NVDIMM bad blocks. The next access of the same memory will be blocked by the NVDIMM layer itself. --- Santosh Sivaraj (3): powerpc/mce: Add MCE notification chain of_pmem: Add memory ranges which took a mce to bad range papr/scm: Add bad memory ranges to nvdimm bad ranges arch/powerpc/include/asm/mce.h | 3 + arch/powerpc/kernel/mce.c | 15 +++ arch/powerpc/platforms/pseries/papr_scm.c | 86 +++++++++++- drivers/nvdimm/of_pmem.c | 151 +++++++++++++++++++--- 4 files changed, 234 insertions(+), 21 deletions(-) -- 2.21.0 ^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 2/3] of_pmem: Add memory ranges which took a mce to bad range 2019-08-20 2:30 [PATCH 0/3] Add bad pmem bad blocks to bad range Santosh Sivaraj @ 2019-08-20 2:30 ` Santosh Sivaraj 2019-08-20 8:41 ` Oliver O'Halloran 0 siblings, 1 reply; 8+ messages in thread From: Santosh Sivaraj @ 2019-08-20 2:30 UTC (permalink / raw) To: linuxppc-dev; +Cc: Chandan Rajendra, Oliver O'Halloran, Mahesh Salgaonkar Subscribe to the MCE notification and add the physical address which generated a memory error to nvdimm bad range. Signed-off-by: Santosh Sivaraj <santosh@fossix.org> --- drivers/nvdimm/of_pmem.c | 151 +++++++++++++++++++++++++++++++++------ 1 file changed, 131 insertions(+), 20 deletions(-) diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c index a0c8dcfa0bf9..155e56862fdf 100644 --- a/drivers/nvdimm/of_pmem.c +++ b/drivers/nvdimm/of_pmem.c @@ -8,6 +8,9 @@ #include <linux/module.h> #include <linux/ioport.h> #include <linux/slab.h> +#include <linux/string.h> +#include <linux/nd.h> +#include <asm/mce.h> static const struct attribute_group *region_attr_groups[] = { &nd_region_attribute_group, @@ -25,11 +28,77 @@ struct of_pmem_private { struct nvdimm_bus *bus; }; +struct of_pmem_region { + struct of_pmem_private *priv; + struct nd_region_desc *region_desc; + struct nd_region *region; + struct list_head region_list; +}; + +LIST_HEAD(pmem_regions); +DEFINE_MUTEX(pmem_region_lock); + +static int handle_mce_ue(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct machine_check_event *evt = data; + struct of_pmem_region *pmem_region; + u64 aligned_addr, phys_addr; + bool found = false; + + if (evt->error_type != MCE_ERROR_TYPE_UE) + return NOTIFY_DONE; + + if (list_empty(&pmem_regions)) + return NOTIFY_DONE; + + phys_addr = evt->u.ue_error.physical_address + + (evt->u.ue_error.effective_address & ~PAGE_MASK); + + if (!evt->u.ue_error.physical_address_provided || + !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT))) + return NOTIFY_DONE; + + mutex_lock(&pmem_region_lock); + list_for_each_entry(pmem_region, &pmem_regions, region_list) { + struct resource *res = pmem_region->region_desc->res; + + if (phys_addr >= res->start && phys_addr <= res->end) { + found = true; + break; + } + } + mutex_unlock(&pmem_region_lock); + + if (!found) + return NOTIFY_DONE; + + aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES); + + if (nvdimm_bus_add_badrange(pmem_region->priv->bus, aligned_addr, + L1_CACHE_BYTES)) + return NOTIFY_DONE; + + pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n", + aligned_addr, aligned_addr + L1_CACHE_BYTES); + + + nvdimm_region_notify(pmem_region->region, NVDIMM_REVALIDATE_POISON); + + return NOTIFY_OK; +} + +static struct notifier_block mce_ue_nb = { + .notifier_call = handle_mce_ue +}; + static int of_pmem_region_probe(struct platform_device *pdev) { struct of_pmem_private *priv; struct device_node *np; struct nvdimm_bus *bus; + struct of_pmem_region *pmem_region; + struct nd_region_desc *ndr_desc; bool is_volatile; int i; @@ -58,32 +127,49 @@ static int of_pmem_region_probe(struct platform_device *pdev) is_volatile ? "volatile" : "non-volatile", np); for (i = 0; i < pdev->num_resources; i++) { - struct nd_region_desc ndr_desc; struct nd_region *region; - /* - * NB: libnvdimm copies the data from ndr_desc into it's own - * structures so passing a stack pointer is fine. - */ - memset(&ndr_desc, 0, sizeof(ndr_desc)); - ndr_desc.attr_groups = region_attr_groups; - ndr_desc.numa_node = dev_to_node(&pdev->dev); - ndr_desc.target_node = ndr_desc.numa_node; - ndr_desc.res = &pdev->resource[i]; - ndr_desc.of_node = np; - set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); + ndr_desc = kzalloc(sizeof(struct nd_region_desc), GFP_KERNEL); + if (!ndr_desc) { + nvdimm_bus_unregister(priv->bus); + kfree(priv); + return -ENOMEM; + } + + ndr_desc->attr_groups = region_attr_groups; + ndr_desc->numa_node = dev_to_node(&pdev->dev); + ndr_desc->target_node = ndr_desc->numa_node; + ndr_desc->res = &pdev->resource[i]; + ndr_desc->of_node = np; + set_bit(ND_REGION_PAGEMAP, &ndr_desc->flags); if (is_volatile) - region = nvdimm_volatile_region_create(bus, &ndr_desc); + region = nvdimm_volatile_region_create(bus, ndr_desc); else - region = nvdimm_pmem_region_create(bus, &ndr_desc); + region = nvdimm_pmem_region_create(bus, ndr_desc); - if (!region) + if (!region) { dev_warn(&pdev->dev, "Unable to register region %pR from %pOF\n", - ndr_desc.res, np); - else - dev_dbg(&pdev->dev, "Registered region %pR from %pOF\n", - ndr_desc.res, np); + ndr_desc->res, np); + continue; + } + + dev_dbg(&pdev->dev, "Registered region %pR from %pOF\n", + ndr_desc->res, np); + + pmem_region = kzalloc(sizeof(struct of_pmem_region), + GFP_KERNEL); + if (!pmem_region) + continue; + + pmem_region->region_desc = ndr_desc; + pmem_region->region = region; + pmem_region->priv = priv; + + /* Save regions registered for use by the notification code */ + mutex_lock(&pmem_region_lock); + list_add_tail(&pmem_region->region_list, &pmem_regions); + mutex_unlock(&pmem_region_lock); } return 0; @@ -92,6 +178,13 @@ static int of_pmem_region_probe(struct platform_device *pdev) static int of_pmem_region_remove(struct platform_device *pdev) { struct of_pmem_private *priv = platform_get_drvdata(pdev); + struct of_pmem_region *r, *t; + + list_for_each_entry_safe(r, t, &pmem_regions, region_list) { + list_del(&(r->region_list)); + kfree(r->region_desc); + kfree(r); + } nvdimm_bus_unregister(priv->bus); kfree(priv); @@ -113,7 +206,25 @@ static struct platform_driver of_pmem_region_driver = { }, }; -module_platform_driver(of_pmem_region_driver); +static int __init of_pmem_init(void) +{ + int ret; + + ret = platform_driver_register(&of_pmem_region_driver); + if (!ret) + mce_register_notifier(&mce_ue_nb); + + return ret; +} +module_init(of_pmem_init); + +static void __exit of_pmem_exit(void) +{ + mce_unregister_notifier(&mce_ue_nb); + platform_driver_unregister(&of_pmem_region_driver); +} +module_exit(of_pmem_exit); + MODULE_DEVICE_TABLE(of, of_pmem_region_match); MODULE_LICENSE("GPL"); MODULE_AUTHOR("IBM Corporation"); -- 2.21.0 ^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 2/3] of_pmem: Add memory ranges which took a mce to bad range 2019-08-20 2:30 ` [PATCH 2/3] of_pmem: Add memory ranges which took a mce " Santosh Sivaraj @ 2019-08-20 8:41 ` Oliver O'Halloran 0 siblings, 0 replies; 8+ messages in thread From: Oliver O'Halloran @ 2019-08-20 8:41 UTC (permalink / raw) To: Santosh Sivaraj Cc: linux-nvdimm, Aneesh Kumar K.V, Mahesh Salgaonkar, Chandan Rajendra, Dan Williams, linuxppc-dev On Tue, Aug 20, 2019 at 12:30 PM Santosh Sivaraj <santosh@fossix.org> wrote: > > Subscribe to the MCE notification and add the physical address which > generated a memory error to nvdimm bad range. Uh... I should have looked a bit closer at this on v1. a) of_pmem.c isn't part of the powerpc tree. You should have CCed this to the nvdimm list since you'll need an Ack from Dan Williams. b) of_pmem isn't powerpc specific. Adding a pile of powerpc specific machine check parsing means it's not going to compile on other DT platforms. c) all this appears to be copied and pasted from the papr_scm version of this. Considering this is pretty similar in spirit to what's done on x86 today (drivers/acpi/nfit/mce.c) I think you would get more milage out of moving the address matching into libnvdimm itself. Machine check handling is always going to be arch specific, but memory errors could be re-emitted by the arch code into a more generic notifier chain that libnvdimm use. There's probably other uses in mm/ for such a chain too. Oliver > Signed-off-by: Santosh Sivaraj <santosh@fossix.org> > --- > drivers/nvdimm/of_pmem.c | 151 +++++++++++++++++++++++++++++++++------ > 1 file changed, 131 insertions(+), 20 deletions(-) > > diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c > index a0c8dcfa0bf9..155e56862fdf 100644 > --- a/drivers/nvdimm/of_pmem.c > +++ b/drivers/nvdimm/of_pmem.c > @@ -8,6 +8,9 @@ > #include <linux/module.h> > #include <linux/ioport.h> > #include <linux/slab.h> > +#include <linux/string.h> > +#include <linux/nd.h> > +#include <asm/mce.h> > > static const struct attribute_group *region_attr_groups[] = { > &nd_region_attribute_group, > @@ -25,11 +28,77 @@ struct of_pmem_private { > struct nvdimm_bus *bus; > }; > > +struct of_pmem_region { > + struct of_pmem_private *priv; > + struct nd_region_desc *region_desc; > + struct nd_region *region; > + struct list_head region_list; > +}; > + > +LIST_HEAD(pmem_regions); > +DEFINE_MUTEX(pmem_region_lock); > + > +static int handle_mce_ue(struct notifier_block *nb, unsigned long val, > + void *data) > +{ > + struct machine_check_event *evt = data; > + struct of_pmem_region *pmem_region; > + u64 aligned_addr, phys_addr; > + bool found = false; > + > + if (evt->error_type != MCE_ERROR_TYPE_UE) > + return NOTIFY_DONE; > + > + if (list_empty(&pmem_regions)) > + return NOTIFY_DONE; > + > + phys_addr = evt->u.ue_error.physical_address + > + (evt->u.ue_error.effective_address & ~PAGE_MASK); > + > + if (!evt->u.ue_error.physical_address_provided || > + !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT))) > + return NOTIFY_DONE; > + > + mutex_lock(&pmem_region_lock); > + list_for_each_entry(pmem_region, &pmem_regions, region_list) { > + struct resource *res = pmem_region->region_desc->res; > + > + if (phys_addr >= res->start && phys_addr <= res->end) { > + found = true; > + break; > + } > + } > + mutex_unlock(&pmem_region_lock); > + > + if (!found) > + return NOTIFY_DONE; > + > + aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES); > + > + if (nvdimm_bus_add_badrange(pmem_region->priv->bus, aligned_addr, > + L1_CACHE_BYTES)) > + return NOTIFY_DONE; > + > + pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n", > + aligned_addr, aligned_addr + L1_CACHE_BYTES); > + > + > + nvdimm_region_notify(pmem_region->region, NVDIMM_REVALIDATE_POISON); > + > + return NOTIFY_OK; > +} > + > +static struct notifier_block mce_ue_nb = { > + .notifier_call = handle_mce_ue > +}; > + > static int of_pmem_region_probe(struct platform_device *pdev) > { > struct of_pmem_private *priv; > struct device_node *np; > struct nvdimm_bus *bus; > + struct of_pmem_region *pmem_region; > + struct nd_region_desc *ndr_desc; > bool is_volatile; > int i; > > @@ -58,32 +127,49 @@ static int of_pmem_region_probe(struct platform_device *pdev) > is_volatile ? "volatile" : "non-volatile", np); > > for (i = 0; i < pdev->num_resources; i++) { > - struct nd_region_desc ndr_desc; > struct nd_region *region; > > - /* > - * NB: libnvdimm copies the data from ndr_desc into it's own > - * structures so passing a stack pointer is fine. > - */ > - memset(&ndr_desc, 0, sizeof(ndr_desc)); > - ndr_desc.attr_groups = region_attr_groups; > - ndr_desc.numa_node = dev_to_node(&pdev->dev); > - ndr_desc.target_node = ndr_desc.numa_node; > - ndr_desc.res = &pdev->resource[i]; > - ndr_desc.of_node = np; > - set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); > + ndr_desc = kzalloc(sizeof(struct nd_region_desc), GFP_KERNEL); > + if (!ndr_desc) { > + nvdimm_bus_unregister(priv->bus); > + kfree(priv); > + return -ENOMEM; > + } > + > + ndr_desc->attr_groups = region_attr_groups; > + ndr_desc->numa_node = dev_to_node(&pdev->dev); > + ndr_desc->target_node = ndr_desc->numa_node; > + ndr_desc->res = &pdev->resource[i]; > + ndr_desc->of_node = np; > + set_bit(ND_REGION_PAGEMAP, &ndr_desc->flags); > > if (is_volatile) > - region = nvdimm_volatile_region_create(bus, &ndr_desc); > + region = nvdimm_volatile_region_create(bus, ndr_desc); > else > - region = nvdimm_pmem_region_create(bus, &ndr_desc); > + region = nvdimm_pmem_region_create(bus, ndr_desc); > > - if (!region) > + if (!region) { > dev_warn(&pdev->dev, "Unable to register region %pR from %pOF\n", > - ndr_desc.res, np); > - else > - dev_dbg(&pdev->dev, "Registered region %pR from %pOF\n", > - ndr_desc.res, np); > + ndr_desc->res, np); > + continue; > + } > + > + dev_dbg(&pdev->dev, "Registered region %pR from %pOF\n", > + ndr_desc->res, np); > + > + pmem_region = kzalloc(sizeof(struct of_pmem_region), > + GFP_KERNEL); > + if (!pmem_region) > + continue; > + > + pmem_region->region_desc = ndr_desc; > + pmem_region->region = region; > + pmem_region->priv = priv; > + > + /* Save regions registered for use by the notification code */ > + mutex_lock(&pmem_region_lock); > + list_add_tail(&pmem_region->region_list, &pmem_regions); > + mutex_unlock(&pmem_region_lock); > } > > return 0; > @@ -92,6 +178,13 @@ static int of_pmem_region_probe(struct platform_device *pdev) > static int of_pmem_region_remove(struct platform_device *pdev) > { > struct of_pmem_private *priv = platform_get_drvdata(pdev); > + struct of_pmem_region *r, *t; > + > + list_for_each_entry_safe(r, t, &pmem_regions, region_list) { > + list_del(&(r->region_list)); > + kfree(r->region_desc); > + kfree(r); > + } > > nvdimm_bus_unregister(priv->bus); > kfree(priv); > @@ -113,7 +206,25 @@ static struct platform_driver of_pmem_region_driver = { > }, > }; > > -module_platform_driver(of_pmem_region_driver); > +static int __init of_pmem_init(void) > +{ > + int ret; > + > + ret = platform_driver_register(&of_pmem_region_driver); > + if (!ret) > + mce_register_notifier(&mce_ue_nb); > + > + return ret; > +} > +module_init(of_pmem_init); > + > +static void __exit of_pmem_exit(void) > +{ > + mce_unregister_notifier(&mce_ue_nb); > + platform_driver_unregister(&of_pmem_region_driver); > +} > +module_exit(of_pmem_exit); > + > MODULE_DEVICE_TABLE(of, of_pmem_region_match); > MODULE_LICENSE("GPL"); > MODULE_AUTHOR("IBM Corporation"); > -- > 2.21.0 > ^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2019-08-20 8:43 UTC | newest] Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2019-08-14 8:24 [PATCH 0/3] Add bad pmem bad blocks to bad range Santosh Sivaraj 2019-08-14 8:24 ` [PATCH 1/3] powerpc/mce: Add MCE notification chain Santosh Sivaraj 2019-08-14 8:24 ` [PATCH 2/3] of_pmem: Add memory ranges which took a mce to bad range Santosh Sivaraj 2019-08-14 8:24 ` [PATCH 3/3] papr/scm: Add bad memory ranges to nvdimm bad ranges Santosh Sivaraj 2019-08-15 13:11 ` Oliver O'Halloran 2019-08-15 14:00 ` Santosh Sivaraj 2019-08-20 2:30 [PATCH 0/3] Add bad pmem bad blocks to bad range Santosh Sivaraj 2019-08-20 2:30 ` [PATCH 2/3] of_pmem: Add memory ranges which took a mce " Santosh Sivaraj 2019-08-20 8:41 ` Oliver O'Halloran
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).