On Mon, Jun 14, 2021 at 10:10:03PM +0530, Aneesh Kumar K.V wrote: > FORM2 introduce a concept of secondary domain which is identical to the > conceept of FORM1 primary domain. Use secondary domain as the numa node > when using persistent memory device. For DAX kmem use the logical domain > id introduced in FORM2. This new numa node > > Signed-off-by: Aneesh Kumar K.V > --- > arch/powerpc/mm/numa.c | 28 +++++++++++++++++++++++ > arch/powerpc/platforms/pseries/papr_scm.c | 26 +++++++++++++-------- > arch/powerpc/platforms/pseries/pseries.h | 1 + > 3 files changed, 45 insertions(+), 10 deletions(-) > > diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c > index 86cd2af014f7..b9ac6d02e944 100644 > --- a/arch/powerpc/mm/numa.c > +++ b/arch/powerpc/mm/numa.c > @@ -265,6 +265,34 @@ static int associativity_to_nid(const __be32 *associativity) > return nid; > } > > +int get_primary_and_secondary_domain(struct device_node *node, int *primary, int *secondary) > +{ > + int secondary_index; > + const __be32 *associativity; > + > + if (!numa_enabled) { > + *primary = NUMA_NO_NODE; > + *secondary = NUMA_NO_NODE; > + return 0; > + } > + > + associativity = of_get_associativity(node); > + if (!associativity) > + return -ENODEV; > + > + if (of_read_number(associativity, 1) >= primary_domain_index) { > + *primary = of_read_number(&associativity[primary_domain_index], 1); > + secondary_index = of_read_number(&distance_ref_points[1], 1); Secondary ID is always the second reference point, but primary depends on the length of resources? That seems very weird. > + *secondary = of_read_number(&associativity[secondary_index], 1); > + } > + if (*primary == 0xffff || *primary >= nr_node_ids) > + *primary = NUMA_NO_NODE; > + > + if (*secondary == 0xffff || *secondary >= nr_node_ids) > + *secondary = NUMA_NO_NODE; > + return 0; > +} > + > /* Returns the nid associated with the given device tree node, > * or -1 if not found. > */ > diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c > index ef26fe40efb0..9bf2f1f3ddc5 100644 > --- a/arch/powerpc/platforms/pseries/papr_scm.c > +++ b/arch/powerpc/platforms/pseries/papr_scm.c > @@ -18,6 +18,7 @@ > #include > #include > #include > +#include "pseries.h" > > #define BIND_ANY_ADDR (~0ul) > > @@ -88,6 +89,8 @@ struct papr_scm_perf_stats { > struct papr_scm_priv { > struct platform_device *pdev; > struct device_node *dn; > + int numa_node; > + int target_node; > uint32_t drc_index; > uint64_t blocks; > uint64_t block_size; > @@ -923,7 +926,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) > struct nd_mapping_desc mapping; > struct nd_region_desc ndr_desc; > unsigned long dimm_flags; > - int target_nid, online_nid; > ssize_t stat_size; > > p->bus_desc.ndctl = papr_scm_ndctl; > @@ -974,10 +976,8 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) > mapping.size = p->blocks * p->block_size; // XXX: potential overflow? > > memset(&ndr_desc, 0, sizeof(ndr_desc)); > - target_nid = dev_to_node(&p->pdev->dev); > - online_nid = numa_map_to_online_node(target_nid); > - ndr_desc.numa_node = online_nid; > - ndr_desc.target_node = target_nid; > + ndr_desc.numa_node = p->numa_node; > + ndr_desc.target_node = p->target_node; > ndr_desc.res = &p->res; > ndr_desc.of_node = p->dn; > ndr_desc.provider_data = p; > @@ -1001,9 +1001,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) > ndr_desc.res, p->dn); > goto err; > } > - if (target_nid != online_nid) > - dev_info(dev, "Region registered with target node %d and online node %d", > - target_nid, online_nid); > > mutex_lock(&papr_ndr_lock); > list_add_tail(&p->region_list, &papr_nd_regions); > @@ -1096,7 +1093,7 @@ static int papr_scm_probe(struct platform_device *pdev) > struct papr_scm_priv *p; > const char *uuid_str; > u64 uuid[2]; > - int rc; > + int rc, numa_node; > > /* check we have all the required DT properties */ > if (of_property_read_u32(dn, "ibm,my-drc-index", &drc_index)) { > @@ -1119,11 +1116,20 @@ static int papr_scm_probe(struct platform_device *pdev) > return -ENODEV; > } > > - > p = kzalloc(sizeof(*p), GFP_KERNEL); > if (!p) > return -ENOMEM; > > + if (get_primary_and_secondary_domain(dn, &p->target_node, &numa_node)) { > + dev_err(&pdev->dev, "%pOF: missing NUMA attributes!\n", dn); > + rc = -ENODEV; > + goto err; > + } > + p->numa_node = numa_map_to_online_node(numa_node); > + if (numa_node != p->numa_node) > + dev_info(&pdev->dev, "Region registered with online node %d and device tree node %d", > + p->numa_node, numa_node); > + > /* Initialize the dimm mutex */ > mutex_init(&p->health_mutex); > > diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h > index 663a0859cf13..9c2a1fc9ded1 100644 > --- a/arch/powerpc/platforms/pseries/pseries.h > +++ b/arch/powerpc/platforms/pseries/pseries.h > @@ -114,4 +114,5 @@ void pseries_setup_security_mitigations(void); > void pseries_lpar_read_hblkrm_characteristics(void); > > void update_numa_distance(struct device_node *node); > +int get_primary_and_secondary_domain(struct device_node *node, int *primary, int *secondary); > #endif /* _PSERIES_PSERIES_H */ -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson