Linux-EDAC Archive on lore.kernel.org
 help / color / Atom feed
From: Robert Richter <rrichter@marvell.com>
To: Borislav Petkov <bp@alien8.de>,
	Mauro Carvalho Chehab <mchehab@kernel.org>,
	Tony Luck <tony.luck@intel.com>
Cc: James Morse <james.morse@arm.com>,
	Robert Richter <rrichter@marvell.com>,
	"linux-edac@vger.kernel.org" <linux-edac@vger.kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>
Subject: [PATCH v2 14/20] EDAC, mc: Remove per layer counters
Date: Wed, 6 Nov 2019 09:33:32 +0000
Message-ID: <20191106093239.25517-15-rrichter@marvell.com> (raw)
In-Reply-To: <20191106093239.25517-1-rrichter@marvell.com>

Looking at how mci->{ue,ce}_per_layer[EDAC_MAX_LAYERS] is used, it
turns out that only the leaves in the memory hierarchy are consumed
(in sysfs), but not the intermediate layers, e.g.:

 count = dimm->mci->ce_per_layer[dimm->mci->n_layers-1][dimm->idx];

These unused counters only add complexity, remove them. The error
counter values are directly stored in struct dimm_info now.

Signed-off-by: Robert Richter <rrichter@marvell.com>
---
 drivers/edac/edac_mc.c       | 106 ++++++++++++-----------------------
 drivers/edac/edac_mc_sysfs.c |  20 +++----
 drivers/edac/ghes_edac.c     |   5 +-
 include/linux/edac.h         |   7 +--
 4 files changed, 47 insertions(+), 91 deletions(-)

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index b6032f51338e..dfc17c565d8f 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -315,12 +315,11 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
 	struct csrow_info *csr;
 	struct rank_info *chan;
 	struct dimm_info *dimm;
-	u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
 	unsigned int pos[EDAC_MAX_LAYERS];
-	unsigned int idx, size, tot_dimms = 1, count = 1;
-	unsigned int tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
+	unsigned int idx, size, tot_dimms = 1;
+	unsigned int tot_csrows = 1, tot_channels = 1;
 	void *pvt, *p, *ptr = NULL;
-	int i, j, row, chn, n, len;
+	int j, row, chn, n, len;
 	bool per_rank = false;
 
 	if (WARN_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0))
@@ -346,19 +345,10 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
 	 * stringent as what the compiler would provide if we could simply
 	 * hardcode everything into a single struct.
 	 */
-	mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
-	layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
-	for (i = 0; i < n_layers; i++) {
-		count *= layers[i].size;
-		edac_dbg(4, "errcount layer %d size %d\n", i, count);
-		ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
-		ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
-		tot_errcount += 2 * count;
-	}
-
-	edac_dbg(4, "allocating %d error counters\n", tot_errcount);
-	pvt = edac_align_ptr(&ptr, sz_pvt, 1);
-	size = ((unsigned long)pvt) + sz_pvt;
+	mci	= edac_align_ptr(&ptr, sizeof(*mci), 1);
+	layer	= edac_align_ptr(&ptr, sizeof(*layer), n_layers);
+	pvt	= edac_align_ptr(&ptr, sz_pvt, 1);
+	size	= ((unsigned long)pvt) + sz_pvt;
 
 	edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
 		 size,
@@ -374,10 +364,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
 	 * rather than an imaginary chunk of memory located at address 0.
 	 */
 	layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
-	for (i = 0; i < n_layers; i++) {
-		mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
-		mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
-	}
 	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
 
 	/* setup index and various internal pointers */
@@ -908,53 +894,31 @@ const char *edac_layer_name[] = {
 EXPORT_SYMBOL_GPL(edac_layer_name);
 
 static void edac_inc_ce_error(struct mem_ctl_info *mci,
-			      bool enable_per_layer_report,
 			      const int pos[EDAC_MAX_LAYERS],
 			      const u16 count)
 {
-	int i, index = 0;
+	struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]);
 
 	mci->ce_mc += count;
 
-	if (!enable_per_layer_report) {
+	if (dimm)
+		dimm->ce_count += count;
+	else
 		mci->ce_noinfo_count += count;
-		return;
-	}
-
-	for (i = 0; i < mci->n_layers; i++) {
-		if (pos[i] < 0)
-			break;
-		index += pos[i];
-		mci->ce_per_layer[i][index] += count;
-
-		if (i < mci->n_layers - 1)
-			index *= mci->layers[i + 1].size;
-	}
 }
 
 static void edac_inc_ue_error(struct mem_ctl_info *mci,
-				    bool enable_per_layer_report,
 				    const int pos[EDAC_MAX_LAYERS],
 				    const u16 count)
 {
-	int i, index = 0;
+	struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]);
 
 	mci->ue_mc += count;
 
-	if (!enable_per_layer_report) {
+	if (dimm)
+		dimm->ue_count += count;
+	else
 		mci->ue_noinfo_count += count;
-		return;
-	}
-
-	for (i = 0; i < mci->n_layers; i++) {
-		if (pos[i] < 0)
-			break;
-		index += pos[i];
-		mci->ue_per_layer[i][index] += count;
-
-		if (i < mci->n_layers - 1)
-			index *= mci->layers[i + 1].size;
-	}
 }
 
 static void edac_ce_error(struct mem_ctl_info *mci,
@@ -965,7 +929,6 @@ static void edac_ce_error(struct mem_ctl_info *mci,
 			  const char *label,
 			  const char *detail,
 			  const char *other_detail,
-			  const bool enable_per_layer_report,
 			  const unsigned long page_frame_number,
 			  const unsigned long offset_in_page,
 			  long grain)
@@ -988,7 +951,7 @@ static void edac_ce_error(struct mem_ctl_info *mci,
 				       error_count, msg, msg_aux, label,
 				       location, detail);
 	}
-	edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
+	edac_inc_ce_error(mci, pos, error_count);
 
 	if (mci->scrub_mode == SCRUB_SW_SRC) {
 		/*
@@ -1018,8 +981,7 @@ static void edac_ue_error(struct mem_ctl_info *mci,
 			  const char *location,
 			  const char *label,
 			  const char *detail,
-			  const char *other_detail,
-			  const bool enable_per_layer_report)
+			  const char *other_detail)
 {
 	char *msg_aux = "";
 
@@ -1048,7 +1010,7 @@ static void edac_ue_error(struct mem_ctl_info *mci,
 			      msg, msg_aux, label, location, detail);
 	}
 
-	edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
+	edac_inc_ue_error(mci, pos, error_count);
 }
 
 void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
@@ -1079,16 +1041,16 @@ void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
 			"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
 			e->page_frame_number, e->offset_in_page,
 			e->grain, e->syndrome);
-		edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label,
-			      detail, e->other_detail, e->enable_per_layer_report,
+		edac_ce_error(mci, e->error_count, pos, e->msg, e->location,
+			      e->label, detail, e->other_detail,
 			      e->page_frame_number, e->offset_in_page, e->grain);
 	} else {
 		snprintf(detail, sizeof(detail),
 			"page:0x%lx offset:0x%lx grain:%ld",
 			e->page_frame_number, e->offset_in_page, e->grain);
 
-		edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label,
-			      detail, e->other_detail, e->enable_per_layer_report);
+		edac_ue_error(mci, e->error_count, pos, e->msg, e->location,
+			      e->label, detail, e->other_detail);
 	}
 
 
@@ -1113,6 +1075,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 	int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
 	int i, n_labels = 0;
 	struct edac_raw_error_desc *e = &mci->error_desc;
+	bool any_memory = true;
 
 	edac_dbg(3, "MC%d\n", mci->mc_idx);
 
@@ -1130,9 +1093,9 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 
 	/*
 	 * Check if the event report is consistent and if the memory
-	 * location is known. If it is known, enable_per_layer_report will be
-	 * true, the DIMM(s) label info will be filled and the per-layer
-	 * error counters will be incremented.
+	 * location is known. If it is known, the DIMM(s) label info
+	 * will be filled and the DIMM's error counters will be
+	 * incremented.
 	 */
 	for (i = 0; i < mci->n_layers; i++) {
 		if (pos[i] >= (int)mci->layers[i].size) {
@@ -1150,7 +1113,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			pos[i] = -1;
 		}
 		if (pos[i] >= 0)
-			e->enable_per_layer_report = true;
+			any_memory = false;
 	}
 
 	/*
@@ -1180,16 +1143,17 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			e->grain = dimm->grain;
 
 		/*
-		 * If the error is memory-controller wide, there's no need to
-		 * seek for the affected DIMMs because the whole
-		 * channel/memory controller/...  may be affected.
-		 * Also, don't show errors for empty DIMM slots.
+		 * If the error is memory-controller wide, there's no
+		 * need to seek for the affected DIMMs because the
+		 * whole channel/memory controller/... may be
+		 * affected. Also, don't show errors for empty DIMM
+		 * slots.
 		 */
-		if (!e->enable_per_layer_report || !dimm->nr_pages)
+		if (any_memory || !dimm->nr_pages)
 			continue;
 
 		if (n_labels >= EDAC_MAX_LABELS) {
-			e->enable_per_layer_report = false;
+			any_memory = true;
 			break;
 		}
 		n_labels++;
@@ -1218,7 +1182,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			chan = -2;
 	}
 
-	if (!e->enable_per_layer_report) {
+	if (any_memory) {
 		strcpy(e->label, "any memory");
 	} else {
 		edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 0367554e7437..8682df2f7f4f 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -556,10 +556,8 @@ static ssize_t dimmdev_ce_count_show(struct device *dev,
 				      char *data)
 {
 	struct dimm_info *dimm = to_dimm(dev);
-	u32 count;
 
-	count = dimm->mci->ce_per_layer[dimm->mci->n_layers-1][dimm->idx];
-	return sprintf(data, "%u\n", count);
+	return sprintf(data, "%u\n", dimm->ce_count);
 }
 
 static ssize_t dimmdev_ue_count_show(struct device *dev,
@@ -567,10 +565,8 @@ static ssize_t dimmdev_ue_count_show(struct device *dev,
 				      char *data)
 {
 	struct dimm_info *dimm = to_dimm(dev);
-	u32 count;
 
-	count = dimm->mci->ue_per_layer[dimm->mci->n_layers-1][dimm->idx];
-	return sprintf(data, "%u\n", count);
+	return sprintf(data, "%u\n", dimm->ue_count);
 }
 
 /* dimm/rank attribute files */
@@ -666,7 +662,9 @@ static ssize_t mci_reset_counters_store(struct device *dev,
 					const char *data, size_t count)
 {
 	struct mem_ctl_info *mci = to_mci(dev);
-	int cnt, row, chan, i;
+	struct dimm_info *dimm;
+	int row, chan;
+
 	mci->ue_mc = 0;
 	mci->ce_mc = 0;
 	mci->ue_noinfo_count = 0;
@@ -682,11 +680,9 @@ static ssize_t mci_reset_counters_store(struct device *dev,
 			ri->channels[chan]->ce_count = 0;
 	}
 
-	cnt = 1;
-	for (i = 0; i < mci->n_layers; i++) {
-		cnt *= mci->layers[i].size;
-		memset(mci->ce_per_layer[i], 0, cnt * sizeof(u32));
-		memset(mci->ue_per_layer[i], 0, cnt * sizeof(u32));
+	mci_for_each_dimm(mci, dimm) {
+		dimm->ue_count = 0;
+		dimm->ce_count = 0;
 	}
 
 	mci->start_time = jiffies;
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 725b9c58c028..74017da1f72c 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -356,11 +356,8 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 				     mem_err->mem_dev_handle);
 
 		index = get_dimm_smbios_index(mci, mem_err->mem_dev_handle);
-		if (index >= 0) {
+		if (index >= 0)
 			e->top_layer = index;
-			e->enable_per_layer_report = true;
-		}
-
 	}
 	if (p > e->location)
 		*(p - 1) = '\0';
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 67be279abd11..4d9673954856 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -383,6 +383,9 @@ struct dimm_info {
 	unsigned int csrow, cschannel;	/* Points to the old API data */
 
 	u16 smbios_handle;              /* Handle for SMBIOS type 17 */
+
+	u32 ce_count;
+	u32 ue_count;
 };
 
 /**
@@ -453,8 +456,6 @@ struct errcount_attribute_data {
  * @location:			location of the error
  * @label:			label of the affected DIMM(s)
  * @other_detail:		other driver-specific detail about the error
- * @enable_per_layer_report:	if false, the error affects all layers
- *				(typically, a memory controller error)
  */
 struct edac_raw_error_desc {
 	char location[LOCATION_SIZE];
@@ -470,7 +471,6 @@ struct edac_raw_error_desc {
 	unsigned long syndrome;
 	const char *msg;
 	const char *other_detail;
-	bool enable_per_layer_report;
 };
 
 /* MEMORY controller information structure
@@ -560,7 +560,6 @@ struct mem_ctl_info {
 	 */
 	u32 ce_noinfo_count, ue_noinfo_count;
 	u32 ue_mc, ce_mc;
-	u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
 
 	struct completion complete;
 
-- 
2.20.1


  parent reply index

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-06  9:32 [PATCH v2 00/20] EDAC: Rework edac_mc and ghes drivers Robert Richter
2019-11-06  9:33 ` [PATCH v2 01/20] EDAC: Replace EDAC_DIMM_PTR() macro with edac_get_dimm() function Robert Richter
2019-11-09  7:23   ` Mauro Carvalho Chehab
2019-11-06  9:33 ` [PATCH v2 02/20] EDAC: Remove EDAC_DIMM_OFF() macro Robert Richter
2019-11-09  7:27   ` Mauro Carvalho Chehab
2019-11-06  9:33 ` [PATCH v2 03/20] EDAC: Introduce mci_for_each_dimm() iterator Robert Richter
2019-11-10 11:37   ` Borislav Petkov
2019-11-06  9:33 ` [PATCH v2 04/20] EDAC, mc: Do not BUG_ON() in edac_mc_alloc() Robert Richter
2019-11-06  9:33 ` [PATCH v2 05/20] EDAC, mc: Remove needless zero string termination Robert Richter
2019-11-09  7:28   ` Mauro Carvalho Chehab
2019-11-09 10:11   ` Borislav Petkov
2019-11-06  9:33 ` [PATCH v2 06/20] EDAC, mc: Reduce indentation level in edac_mc_handle_error() Robert Richter
2019-11-06  9:33 ` [PATCH v2 07/20] EDAC, mc: Rename iterator variable to idx Robert Richter
2019-11-09 10:21   ` Borislav Petkov
2019-11-06  9:33 ` [PATCH v2 08/20] EDAC: Remove misleading comment in struct edac_raw_error_desc Robert Richter
2019-11-06  9:33 ` [PATCH v2 09/20] EDAC, ghes: Use standard kernel macros for page calculations Robert Richter
2019-11-06  9:33 ` [PATCH v2 10/20] EDAC, ghes: Fix grain calculation Robert Richter
2019-11-06  9:33 ` [PATCH v2 11/20] EDAC, ghes: Remove intermediate buffer pvt->detail_location Robert Richter
2019-11-06  9:33 ` [PATCH v2 12/20] EDAC, ghes: Unify trace_mc_event() code with edac_mc driver Robert Richter
2019-11-06  9:33 ` [PATCH v2 13/20] EDAC, Documentation: Describe CPER module definition and DIMM ranks Robert Richter
2019-11-06  9:33 ` Robert Richter [this message]
2019-11-09  7:40   ` [PATCH v2 14/20] EDAC, mc: Remove per layer counters Mauro Carvalho Chehab
2019-11-18 20:30     ` Robert Richter
2019-11-06  9:33 ` [PATCH v2 15/20] EDAC, mc: Split edac_mc_alloc() into smaller functions Robert Richter
2019-11-06  9:33 ` [PATCH v2 16/20] EDAC, mc: Reorder functions edac_mc_alloc*() Robert Richter
2019-11-06  9:33 ` [PATCH v2 17/20] EDAC, mc: Rework edac_raw_mc_handle_error() to use struct dimm_info Robert Richter
2019-11-06  9:33 ` [PATCH v2 18/20] EDAC: Store error type in struct edac_raw_error_desc Robert Richter
2019-11-09  7:30   ` Mauro Carvalho Chehab
2019-11-06  9:33 ` [PATCH v2 19/20] EDAC, mc: Determine mci pointer from the error descriptor Robert Richter
2019-11-06  9:33 ` [PATCH v2 20/20] EDAC, mc: Create new function edac_inc_csrow() Robert Richter
2019-11-09  7:32   ` Mauro Carvalho Chehab
2019-11-10 14:51 ` [PATCH v2 00/20] EDAC: Rework edac_mc and ghes drivers Borislav Petkov
2019-11-11 15:49   ` Robert Richter

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191106093239.25517-15-rrichter@marvell.com \
    --to=rrichter@marvell.com \
    --cc=bp@alien8.de \
    --cc=james.morse@arm.com \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mchehab@kernel.org \
    --cc=tony.luck@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-EDAC Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-edac/0 linux-edac/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-edac linux-edac/ https://lore.kernel.org/linux-edac \
		linux-edac@vger.kernel.org
	public-inbox-index linux-edac

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-edac


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git