All of lore.kernel.org
 help / color / mirror / Atom feed
From: Luben Tuikov <luben.tuikov@amd.com>
To: amd-gfx@lists.freedesktop.org
Cc: Alexander Deucher <Alexander.Deucher@amd.com>,
	Luben Tuikov <luben.tuikov@amd.com>,
	John Clements <john.clements@amd.com>,
	Guchun Chen <guchun.chen@amd.com>,
	Hawking Zhang <Hawking.Zhang@amd.com>
Subject: [PATCH 36/40] drm/amdgpu: Use explicit cardinality for clarity
Date: Tue,  8 Jun 2021 17:39:50 -0400	[thread overview]
Message-ID: <20210608213954.5517-37-luben.tuikov@amd.com> (raw)
In-Reply-To: <20210608213954.5517-1-luben.tuikov@amd.com>

RAS_MAX_RECORD_NUM may mean the maximum record
number, as in the maximum house number on your
street, or it may mean the maximum number of
records, as in the count of records, which is also
a number. To make this distinction whether the
number is ordinal (index) or cardinal (count),
rename this macro to RAS_MAX_RECORD_COUNT.

This makes it easy to understand what it refers
to, especially when we compute quantities such as,
how many records do we have left in the table,
especially when there are so many other numbers,
quantities and numerical macros around.

Also rename the long,
amdgpu_ras_eeprom_get_record_max_length() to the
more succinct and clear,
amdgpu_ras_eeprom_max_record_count().

When computing the threshold, which also deals
with counts, i.e. "how many", use cardinal
"max_eeprom_records_count", than the quantitative
"max_eeprom_records_len".

Simplify the logic here and there, as well.

Cc: Guchun Chen <guchun.chen@amd.com>
Cc: John Clements <john.clements@amd.com>
Cc: Hawking Zhang <Hawking.Zhang@amd.com>
Cc: Alexander Deucher <Alexander.Deucher@amd.com>
Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |  9 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c       | 50 ++++++++-----------
 .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c    |  8 +--
 .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h    |  2 +-
 4 files changed, 30 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 3de1accb060e37..0203f654576bcc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -853,11 +853,10 @@ MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legac
 module_param_named(reset_method, amdgpu_reset_method, int, 0444);
 
 /**
- * DOC: bad_page_threshold (int)
- * Bad page threshold is to specify the threshold value of faulty pages
- * detected by RAS ECC, that may result in GPU entering bad status if total
- * faulty pages by ECC exceed threshold value and leave it for user's further
- * check.
+ * DOC: bad_page_threshold (int) Bad page threshold is specifies the
+ * threshold value of faulty pages detected by RAS ECC, which may
+ * result in the GPU entering bad status when the number of total
+ * faulty pages by ECC exceeds the threshold value.
  */
 MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = auto(default value), 0 = disable bad page retirement)");
 module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 66c96c65e7eeb9..95ab400b641af0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -71,8 +71,8 @@ const char *ras_block_string[] = {
 /* inject address is 52 bits */
 #define	RAS_UMC_INJECT_ADDR_LIMIT	(0x1ULL << 52)
 
-/* typical ECC bad page rate(1 bad page per 100MB VRAM) */
-#define RAS_BAD_PAGE_RATE		(100 * 1024 * 1024ULL)
+/* typical ECC bad page rate is 1 bad page per 100MB VRAM */
+#define RAS_BAD_PAGE_COVER              (100 * 1024 * 1024ULL)
 
 enum amdgpu_ras_retire_page_reservation {
 	AMDGPU_RAS_RETIRE_PAGE_RESERVED,
@@ -1841,27 +1841,24 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
 static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
 {
 	struct amdgpu_ras_eeprom_control *control =
-					&adev->psp.ras.ras->eeprom_control;
-	struct eeprom_table_record *bps = NULL;
-	int ret = 0;
+		&adev->psp.ras.ras->eeprom_control;
+	struct eeprom_table_record *bps;
+	int ret;
 
 	/* no bad page record, skip eeprom access */
-	if (!control->num_recs || (amdgpu_bad_page_threshold == 0))
-		return ret;
+	if (control->num_recs == 0 || amdgpu_bad_page_threshold == 0)
+		return 0;
 
 	bps = kcalloc(control->num_recs, sizeof(*bps), GFP_KERNEL);
 	if (!bps)
 		return -ENOMEM;
 
-	if (amdgpu_ras_eeprom_read(control, bps, control->num_recs)) {
+	ret = amdgpu_ras_eeprom_read(control, bps, control->num_recs);
+	if (ret)
 		dev_err(adev->dev, "Failed to load EEPROM table records!");
-		ret = -EIO;
-		goto out;
-	}
-
-	ret = amdgpu_ras_add_bad_pages(adev, bps, control->num_recs);
+	else
+		ret = amdgpu_ras_add_bad_pages(adev, bps, control->num_recs);
 
-out:
 	kfree(bps);
 	return ret;
 }
@@ -1901,11 +1898,9 @@ static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
 }
 
 static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
-					uint32_t max_length)
+					  uint32_t max_count)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-	int tmp_threshold = amdgpu_bad_page_threshold;
-	u64 val;
 
 	/*
 	 * Justification of value bad_page_cnt_threshold in ras structure
@@ -1926,18 +1921,15 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
 	 *      take no effect.
 	 */
 
-	if (tmp_threshold < -1)
-		tmp_threshold = -1;
-	else if (tmp_threshold > max_length)
-		tmp_threshold = max_length;
+	if (amdgpu_bad_page_threshold < 0) {
+		u64 val = adev->gmc.mc_vram_size;
 
-	if (tmp_threshold == -1) {
-		val = adev->gmc.mc_vram_size;
-		do_div(val, RAS_BAD_PAGE_RATE);
+		do_div(val, RAS_BAD_PAGE_COVER);
 		con->bad_page_cnt_threshold = min(lower_32_bits(val),
-						max_length);
+						  max_count);
 	} else {
-		con->bad_page_cnt_threshold = tmp_threshold;
+		con->bad_page_cnt_threshold = min_t(int, max_count,
+						    amdgpu_bad_page_threshold);
 	}
 }
 
@@ -1945,7 +1937,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 	struct ras_err_handler_data **data;
-	uint32_t max_eeprom_records_len = 0;
+	u32  max_eeprom_records_count = 0;
 	bool exc_err_limit = false;
 	int ret;
 
@@ -1965,8 +1957,8 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
 	atomic_set(&con->in_recovery, 0);
 	con->adev = adev;
 
-	max_eeprom_records_len = amdgpu_ras_eeprom_get_record_max_length();
-	amdgpu_ras_validate_threshold(adev, max_eeprom_records_len);
+	max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count();
+	amdgpu_ras_validate_threshold(adev, max_eeprom_records_count);
 
 	/* Todo: During test the SMU might fail to read the eeprom through I2C
 	 * when the GPU is pending on XGMI reset during probe time
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 54ef31594accd9..21e1e59e4857ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -54,7 +54,7 @@
 #define RAS_TBL_SIZE_BYTES      (256 * 1024)
 #define RAS_HDR_START           0
 #define RAS_RECORD_START        (RAS_HDR_START + RAS_TABLE_HEADER_SIZE)
-#define RAS_MAX_RECORD_NUM      ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE) \
+#define RAS_MAX_RECORD_COUNT    ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE) \
 				 / RAS_TABLE_RECORD_SIZE)
 
 #define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev
@@ -532,7 +532,7 @@ static int amdgpu_ras_eeprom_xfer(struct amdgpu_ras_eeprom_control *control,
 		 * TODO - Check the assumption is correct
 		 */
 		control->num_recs += num;
-		control->num_recs %= RAS_MAX_RECORD_NUM;
+		control->num_recs %= RAS_MAX_RECORD_COUNT;
 		control->tbl_hdr.tbl_size += RAS_TABLE_RECORD_SIZE * num;
 		if (control->tbl_hdr.tbl_size > RAS_TBL_SIZE_BYTES)
 			control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE +
@@ -568,9 +568,9 @@ int amdgpu_ras_eeprom_write(struct amdgpu_ras_eeprom_control *control,
 	return amdgpu_ras_eeprom_xfer(control, records, num, true);
 }
 
-inline uint32_t amdgpu_ras_eeprom_get_record_max_length(void)
+inline uint32_t amdgpu_ras_eeprom_max_record_count(void)
 {
-	return RAS_MAX_RECORD_NUM;
+	return RAS_MAX_RECORD_COUNT;
 }
 
 /* Used for testing if bugs encountered */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
index 4906ed9fb8cdd3..504729b8053759 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
@@ -88,7 +88,7 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
 int amdgpu_ras_eeprom_write(struct amdgpu_ras_eeprom_control *control,
 			    struct eeprom_table_record *records, const u32 num);
 
-inline uint32_t amdgpu_ras_eeprom_get_record_max_length(void);
+inline uint32_t amdgpu_ras_eeprom_max_record_count(void);
 
 void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control *control);
 
-- 
2.32.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

  parent reply	other threads:[~2021-06-08 21:41 UTC|newest]

Thread overview: 74+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-08 21:39 [PATCH 00/40] I2C fixes Luben Tuikov
2021-06-08 21:39 ` [PATCH 01/40] drm/amdgpu: add a mutex for the smu11 i2c bus (v2) Luben Tuikov
2021-06-08 21:39 ` [PATCH 02/40] drm/amdgpu/pm: rework i2c xfers on sienna cichlid (v3) Luben Tuikov
2021-06-08 21:39 ` [PATCH 03/40] drm/amdgpu/pm: rework i2c xfers on arcturus (v3) Luben Tuikov
2021-06-08 21:39 ` [PATCH 04/40] drm/amdgpu/pm: add smu i2c implementation for navi1x (v3) Luben Tuikov
2021-06-08 21:39 ` [PATCH 05/40] drm/amdgpu: add new helper for handling EEPROM i2c transfers Luben Tuikov
2021-06-08 21:39 ` [PATCH 06/40] drm/amdgpu/ras: switch ras eeprom handling to use generic helper Luben Tuikov
2021-06-08 21:39 ` [PATCH 07/40] drm/amdgpu/ras: switch fru eeprom handling to use generic helper (v2) Luben Tuikov
2021-06-08 21:39 ` [PATCH 08/40] drm/amdgpu: i2c subsystem uses 7 bit addresses Luben Tuikov
2021-06-08 21:39 ` [PATCH 09/40] drm/amdgpu: add I2C_CLASS_HWMON to SMU i2c buses Luben Tuikov
2021-06-08 21:39 ` [PATCH 10/40] drm/amdgpu: rework smu11 i2c for generic operation Luben Tuikov
2021-06-08 21:39 ` [PATCH 11/40] drm/amdgpu: only set restart on first cmd of the smu i2c transaction Luben Tuikov
2021-06-08 21:39 ` [PATCH 12/40] drm/amdgpu: Remember to wait 10ms for write buffer flush v2 Luben Tuikov
2021-06-08 21:39 ` [PATCH 13/40] dmr/amdgpu: Add RESTART handling also to smu_v11_0_i2c (VG20) Luben Tuikov
2021-06-10 20:18   ` Alex Deucher
2021-06-08 21:39 ` [PATCH 14/40] drm/amdgpu: Drop i > 0 restriction for issuing RESTART Luben Tuikov
2021-06-10 20:21   ` Alex Deucher
2021-06-08 21:39 ` [PATCH 15/40] drm/amdgpu: Send STOP for the last byte of msg only Luben Tuikov
2021-06-10 20:22   ` Alex Deucher
2021-06-08 21:39 ` [PATCH 16/40] drm/amd/pm: SMU I2C: Return number of messages processed Luben Tuikov
2021-06-10 20:25   ` Alex Deucher
2021-06-08 21:39 ` [PATCH 17/40] drm/amdgpu/pm: ADD I2C quirk adapter table Luben Tuikov
2021-06-10 20:26   ` Alex Deucher
2021-06-08 21:39 ` [PATCH 18/40] drm/amdgpu: Fix Vega20 I2C to be agnostic (v2) Luben Tuikov
2021-06-10 20:43   ` Alex Deucher
2021-06-08 21:39 ` [PATCH 19/40] drm/amdgpu: Fixes to the AMDGPU EEPROM driver Luben Tuikov
2021-06-10 20:53   ` Alex Deucher
2021-06-08 21:39 ` [PATCH 20/40] drm/amdgpu: EEPROM respects I2C quirks Luben Tuikov
2021-06-11 17:01   ` Alex Deucher
2021-06-11 17:17     ` Luben Tuikov
2021-06-11 17:37       ` Luben Tuikov
2021-06-08 21:39 ` [PATCH 21/40] drm/amdgpu: I2C EEPROM full memory addressing Luben Tuikov
2021-06-10 20:57   ` Alex Deucher
2021-06-08 21:39 ` [PATCH 22/40] drm/amdgpu: RAS and FRU now use 19-bit I2C address Luben Tuikov
2021-06-10 20:59   ` Alex Deucher
2021-06-08 21:39 ` [PATCH 23/40] drm/amdgpu: Fix wrap-around bugs in RAS Luben Tuikov
2021-06-10 21:00   ` Alex Deucher
2021-06-08 21:39 ` [PATCH 24/40] drm/amdgpu: I2C class is HWMON Luben Tuikov
2021-06-10 21:02   ` Alex Deucher
2021-06-08 21:39 ` [PATCH 25/40] drm/amdgpu: RAS: EEPROM --> RAS Luben Tuikov
2021-06-10 21:03   ` Alex Deucher
2021-06-08 21:39 ` [PATCH 26/40] drm/amdgpu: Rename misspelled function Luben Tuikov
2021-06-10 21:04   ` Alex Deucher
2021-06-08 21:39 ` [PATCH 27/40] drm/amdgpu: RAS xfer to read/write Luben Tuikov
2021-06-10 21:05   ` Alex Deucher
2021-06-08 21:39 ` [PATCH 28/40] drm/amdgpu: EEPROM: add explicit read and write Luben Tuikov
2021-06-10 21:06   ` Alex Deucher
2021-06-08 21:39 ` [PATCH 29/40] drm/amd/pm: Extend the I2C quirk table Luben Tuikov
2021-06-10 21:07   ` Alex Deucher
2021-06-08 21:39 ` [PATCH 30/40] drm/amd/pm: Simplify managed I2C transfer functions Luben Tuikov
2021-06-10 21:08   ` Alex Deucher
2021-06-08 21:39 ` [PATCH 31/40] drm/amdgpu: Fix width of I2C address Luben Tuikov
2021-06-10 21:09   ` Alex Deucher
2021-06-08 21:39 ` [PATCH 32/40] drm/amdgpu: Return result fix in RAS Luben Tuikov
2021-06-10 21:11   ` Alex Deucher
2021-06-08 21:39 ` [PATCH 33/40] drm/amd/pm: Fix a bug in i2c_xfer Luben Tuikov
2021-06-10 21:12   ` Alex Deucher
2021-06-10 22:26     ` Luben Tuikov
2021-06-08 21:39 ` [PATCH 34/40] drm/amdgpu: Fix amdgpu_ras_eeprom_init() Luben Tuikov
2021-06-10 21:12   ` Alex Deucher
2021-06-08 21:39 ` [PATCH 35/40] drm/amdgpu: Simplify RAS EEPROM checksum calculations Luben Tuikov
2021-06-11 17:07   ` Alex Deucher
2021-06-08 21:39 ` Luben Tuikov [this message]
2021-06-10 21:17   ` [PATCH 36/40] drm/amdgpu: Use explicit cardinality for clarity Alex Deucher
2021-06-08 21:39 ` [PATCH 37/40] drm/amdgpu: Optimizations to EEPROM RAS table I/O Luben Tuikov
2021-06-08 21:39 ` [PATCH 38/40] drm/amdgpu: RAS EEPROM table is now in debugfs Luben Tuikov
2021-06-11 17:16   ` Alex Deucher
2021-06-11 17:30     ` Luben Tuikov
2021-06-11 17:51       ` Alex Deucher
2021-06-11 18:06         ` Luben Tuikov
2021-06-08 21:39 ` [PATCH 39/40] drm/amdgpu: Fix koops when accessing RAS EEPROM Luben Tuikov
2021-06-10 21:23   ` Alex Deucher
2021-06-08 21:39 ` [PATCH 40/40] drm/amdgpu: Use a single loop Luben Tuikov
2021-06-10 21:25   ` Alex Deucher

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210608213954.5517-37-luben.tuikov@amd.com \
    --to=luben.tuikov@amd.com \
    --cc=Alexander.Deucher@amd.com \
    --cc=Hawking.Zhang@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=guchun.chen@amd.com \
    --cc=john.clements@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.