linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Robert Richter <rrichter@marvell.com>
To: Borislav Petkov <bp@alien8.de>, James Morse <james.morse@arm.com>,
	"Mauro Carvalho Chehab" <mchehab@kernel.org>
Cc: "linux-edac@vger.kernel.org" <linux-edac@vger.kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	Robert Richter <rrichter@marvell.com>
Subject: [PATCH v2 15/24] EDAC, ghes: Extract numa node information for each dimm
Date: Mon, 24 Jun 2019 15:09:25 +0000	[thread overview]
Message-ID: <20190624150758.6695-16-rrichter@marvell.com> (raw)
In-Reply-To: <20190624150758.6695-1-rrichter@marvell.com>

In a later patch we want to have one mc device per node. This patch
extracts the numa node information for each dimm. This is done by
collecting the physical address ranges from the DMI table (Memory
Array Mapped Address - Type 19 of SMBIOS spec). The node information
for a physical address is already know to a numa aware system (e.g. by
using the ACPI _PXM method or the ACPI SRAT table), so based on the PA
we can assign the node id to the dimms.

A fallback that disables numa is implemented in case the node
information is inconsistent.

E.g., on a ThunderX2 system the following node mappings are found
based on the DMI table:

EDAC DEBUG: mem_info_setup: DIMM0: Found mem range [0x0000008800000000-0x0000009ffcffffff] on node 0
EDAC DEBUG: mem_info_setup: DIMM1: Found mem range [0x0000008800000000-0x0000009ffcffffff] on node 0
EDAC DEBUG: mem_info_setup: DIMM2: Found mem range [0x0000008800000000-0x0000009ffcffffff] on node 0
EDAC DEBUG: mem_info_setup: DIMM3: Found mem range [0x0000008800000000-0x0000009ffcffffff] on node 0
EDAC DEBUG: mem_info_setup: DIMM4: Found mem range [0x0000008800000000-0x0000009ffcffffff] on node 0
EDAC DEBUG: mem_info_setup: DIMM5: Found mem range [0x0000008800000000-0x0000009ffcffffff] on node 0
EDAC DEBUG: mem_info_setup: DIMM6: Found mem range [0x0000008800000000-0x0000009ffcffffff] on node 0
EDAC DEBUG: mem_info_setup: DIMM7: Found mem range [0x0000008800000000-0x0000009ffcffffff] on node 0
EDAC DEBUG: mem_info_setup: DIMM8: Found mem range [0x0000009ffd000000-0x000000bffcffffff] on node 1
EDAC DEBUG: mem_info_setup: DIMM9: Found mem range [0x0000009ffd000000-0x000000bffcffffff] on node 1
EDAC DEBUG: mem_info_setup: DIMM10: Found mem range [0x0000009ffd000000-0x000000bffcffffff] on node 1
EDAC DEBUG: mem_info_setup: DIMM11: Found mem range [0x0000009ffd000000-0x000000bffcffffff] on node 1
EDAC DEBUG: mem_info_setup: DIMM12: Found mem range [0x0000009ffd000000-0x000000bffcffffff] on node 1
EDAC DEBUG: mem_info_setup: DIMM13: Found mem range [0x0000009ffd000000-0x000000bffcffffff] on node 1
EDAC DEBUG: mem_info_setup: DIMM14: Found mem range [0x0000009ffd000000-0x000000bffcffffff] on node 1
EDAC DEBUG: mem_info_setup: DIMM15: Found mem range [0x0000009ffd000000-0x000000bffcffffff] on node 1

Signed-off-by: Robert Richter <rrichter@marvell.com>
---
 drivers/edac/ghes_edac.c | 98 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 97 insertions(+), 1 deletion(-)

diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 44bfb499b147..793362bea044 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -65,14 +65,32 @@ struct memdev_dmi_entry {
 	u16 conf_mem_clk_speed;
 } __attribute__((__packed__));
 
+/* Memory Array Mapped Address - Type 19 of SMBIOS spec */
+struct memarr_dmi_entry {
+	u8		type;
+	u8		length;
+	u16		handle;
+	u32		start;
+	u32		end;
+	u16		phys_mem_array_handle;
+	u8		partition_width;
+	u64		ext_start;
+	u64		ext_end;
+} __attribute__((__packed__));
+
 struct ghes_dimm_info {
 	struct dimm_info dimm_info;
 	int		idx;
+	int		numa_node;
+	phys_addr_t	start;
+	phys_addr_t	end;
+	u16		phys_handle;
 };
 
 struct ghes_mem_info {
-	int num_dimm;
+	int		num_dimm;
 	struct ghes_dimm_info *dimms;
+	int		dimms_per_node[MAX_NUMNODES];
 };
 
 static struct ghes_mem_info mem_info;
@@ -108,12 +126,52 @@ static int ghes_dimm_info_init(int num)
 
 	for_each_dimm(dimm) {
 		dimm->idx	= idx;
+		dimm->numa_node	= NUMA_NO_NODE;
 		idx++;
 	}
 
 	return 0;
 }
 
+static void ghes_edac_set_nid(const struct dmi_header *dh, void *arg)
+{
+	struct memarr_dmi_entry *entry = (struct memarr_dmi_entry *)dh;
+	struct ghes_dimm_info *dimm;
+	phys_addr_t start, end;
+	int nid;
+
+	if (dh->type != DMI_ENTRY_MEM_ARRAY_MAPPED_ADDR)
+		return;
+
+	/* only support SMBIOS 2.7+ */
+	if (entry->length < sizeof(*entry))
+		return;
+
+	if (entry->start == 0xffffffff)
+		start = entry->ext_start;
+	else
+		start = entry->start;
+	if (entry->end == 0xffffffff)
+		end = entry->ext_end;
+	else
+		end = entry->end;
+
+	if (!pfn_valid(PHYS_PFN(start)))
+		return;
+
+	nid = pfn_to_nid(PHYS_PFN(start));
+	if (nid < 0 || nid >= MAX_NUMNODES || !node_possible(nid))
+		nid = NUMA_NO_NODE;
+
+	for_each_dimm(dimm) {
+		if (entry->phys_mem_array_handle == dimm->phys_handle) {
+			dimm->numa_node	= nid;
+			dimm->start	= start;
+			dimm->end	= end;
+		}
+	}
+}
+
 static int get_dimm_smbios_index(u16 handle)
 {
 	struct mem_ctl_info *mci = ghes_pvt->mci;
@@ -135,6 +193,8 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
 		struct dimm_info *dimm = &mi->dimm_info;
 		u16 rdr_mask = BIT(7) | BIT(13);
 
+		mi->phys_handle = entry->phys_mem_array_handle;
+
 		if (entry->size == 0xffff) {
 			pr_info("Can't get DIMM%i size\n", mi->idx);
 			dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */
@@ -224,8 +284,23 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
 	}
 }
 
+static void mem_info_disable_numa(void)
+{
+	struct ghes_dimm_info *dimm;
+
+	for_each_dimm(dimm) {
+		if (dimm->numa_node != NUMA_NO_NODE)
+			mem_info.dimms_per_node[dimm->numa_node] = 0;
+		dimm->numa_node = 0;
+	}
+
+	mem_info.dimms_per_node[0] = mem_info.num_dimm;
+}
+
 static int mem_info_setup(void)
 {
+	struct ghes_dimm_info *dimm;
+	bool enable_numa = true;
 	int num = 0;
 	int idx = 0;
 	int ret;
@@ -238,6 +313,25 @@ static int mem_info_setup(void)
 		return ret;
 
 	dmi_walk(ghes_edac_dmidecode, &idx);
+	dmi_walk(ghes_edac_set_nid, NULL);
+
+	for_each_dimm(dimm) {
+		if (dimm->numa_node == NUMA_NO_NODE)
+			enable_numa = false;
+		else
+			mem_info.dimms_per_node[dimm->numa_node]++;
+
+		edac_dbg(1, "DIMM%i: Found mem range [%pa-%pa] on node %d\n",
+			dimm->idx, &dimm->start, &dimm->end, dimm->numa_node);
+	}
+
+	if (enable_numa)
+		return 0;
+
+	/* something went wrong, disable numa */
+	if (num_possible_nodes() > 1)
+		pr_warn("Can't get numa info, disabling numa\n");
+	mem_info_disable_numa();
 
 	return 0;
 }
@@ -258,6 +352,8 @@ static int mem_info_setup_fake(void)
 	dimm->dtype = DEV_UNKNOWN;
 	dimm->edac_mode = EDAC_SECDED;
 
+	mem_info_disable_numa();
+
 	return 0;
 }
 
-- 
2.20.1


  parent reply	other threads:[~2019-06-24 15:09 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-24 15:08 [PATCH v2 00/24] EDAC, mc, ghes: Fixes and updates to improve memory error reporting Robert Richter
2019-06-24 15:08 ` [PATCH v2 01/24] EDAC, mc: Fix grain_bits calculation Robert Richter
2019-08-03 10:08   ` Borislav Petkov
2019-06-24 15:08 ` [PATCH v2 02/24] EDAC, ghes: Fix grain calculation Robert Richter
2019-08-09 13:15   ` Borislav Petkov
2019-08-12  6:42     ` Robert Richter
2019-08-12  7:32       ` Borislav Petkov
2019-08-12 12:05         ` Robert Richter
2019-08-12 12:38           ` Borislav Petkov
2019-06-24 15:08 ` [PATCH v2 03/24] EDAC, ghes: Remove pvt->detail_location string Robert Richter
2019-08-02 17:04   ` James Morse
2019-08-07  9:00     ` Robert Richter
2019-08-13  8:09   ` Borislav Petkov
2019-06-24 15:09 ` [PATCH v2 04/24] EDAC, ghes: Unify trace_mc_event() code with edac_mc driver Robert Richter
2019-06-24 15:09 ` [PATCH v2 05/24] EDAC, mc: Fix and improve sysfs init functions Robert Richter
2019-08-13  8:26   ` Borislav Petkov
2019-06-24 15:09 ` [PATCH v2 06/24] EDAC: Kill EDAC_DIMM_PTR() macro Robert Richter
2019-08-13 14:59   ` Borislav Petkov
2019-08-27 12:20     ` Robert Richter
2019-06-24 15:09 ` [PATCH v2 07/24] EDAC: Kill EDAC_DIMM_OFF() macro Robert Richter
2019-08-14 14:52   ` Borislav Petkov
2019-06-24 15:09 ` [PATCH v2 08/24] EDAC: Introduce mci_for_each_dimm() iterator Robert Richter
2019-08-14 15:18   ` Borislav Petkov
2019-08-28  8:18     ` Robert Richter
2019-06-24 15:09 ` [PATCH v2 09/24] EDAC, mc: Cleanup _edac_mc_free() code Robert Richter
2019-08-14 16:31   ` Borislav Petkov
2019-06-24 15:09 ` [PATCH v2 10/24] EDAC, mc: Remove per layer counters Robert Richter
2019-08-16  9:24   ` Borislav Petkov
2019-06-24 15:09 ` [PATCH v2 11/24] EDAC, mc: Rework edac_raw_mc_handle_error() to use struct dimm_info Robert Richter
2019-06-24 15:09 ` [PATCH v2 12/24] EDAC, ghes: Use standard kernel macros for page calculations Robert Richter
2019-08-02 17:04   ` James Morse
2019-08-07  9:52     ` Robert Richter
2019-06-24 15:09 ` [PATCH v2 13/24] EDAC, ghes: Add support for legacy API counters Robert Richter
2019-08-16  9:55   ` Borislav Petkov
2019-08-30  9:35     ` Robert Richter
2019-06-24 15:09 ` [PATCH v2 14/24] EDAC, ghes: Rework memory hierarchy detection Robert Richter
2019-08-20  8:56   ` Borislav Petkov
2019-06-24 15:09 ` Robert Richter [this message]
2019-08-02 17:05   ` [PATCH v2 15/24] EDAC, ghes: Extract numa node information for each dimm James Morse
2019-08-09 13:09     ` Robert Richter
2019-06-24 15:09 ` [PATCH v2 16/24] EDAC, ghes: Moving code around ghes_edac_register() Robert Richter
2019-06-24 15:09 ` [PATCH v2 17/24] EDAC, ghes: Create one memory controller device per node Robert Richter
2019-06-24 15:09 ` [PATCH v2 18/24] EDAC, ghes: Fill sysfs with the DMI DIMM label information Robert Richter
2019-06-24 15:09 ` [PATCH v2 19/24] EDAC, mc: Introduce edac_mc_alloc_by_dimm() for per dimm allocation Robert Richter
2019-06-24 15:09 ` [PATCH v2 20/24] EDAC, ghes: Identify dimm by node, card, module and handle Robert Richter
2019-06-24 15:09 ` [PATCH v2 21/24] EDAC, ghes: Enable per-layer reporting based on card/module Robert Richter
2019-06-24 15:09 ` [PATCH v2 22/24] EDAC, ghes: Move struct member smbios_handle to struct ghes_dimm_info Robert Richter
2019-06-24 15:09 ` [PATCH v2 23/24] EDAC, Documentation: Describe CPER module definition and DIMM ranks Robert Richter
2019-06-24 15:09 ` [PATCH v2 24/24] EDAC, ghes: Disable legacy API for ARM64 Robert Richter
2019-06-26  9:33   ` James Morse
2019-06-26 10:11     ` Robert Richter
2019-08-02  7:58 ` [PATCH v2 00/24] EDAC, mc, ghes: Fixes and updates to improve memory error reporting Robert Richter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190624150758.6695-16-rrichter@marvell.com \
    --to=rrichter@marvell.com \
    --cc=bp@alien8.de \
    --cc=james.morse@arm.com \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mchehab@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).