All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrei Kartashev <a.kartashev@yadro.com>
To: Eddie James <eajames@linux.ibm.com>, <openbmc@lists.ozlabs.org>
Subject: Re: [PATCH linux dev-5.10 33/35] pmbus: (core) Add a one-shot retry in pmbus_set_page()
Date: Tue, 9 Mar 2021 23:21:23 +0300	[thread overview]
Message-ID: <ee1c9f78d721fd52d62087674ee282d73e7237de.camel@yadro.com> (raw)
In-Reply-To: <20210308225419.46530-34-eajames@linux.ibm.com>

Hi,
I have a similar patch in our local tree, but it adds retry in more
places. I had to add retries for all i2c_smbus_* operations because pf
communication to PSUs sometime very unstable.
Here is it:

From 7688b90c3e7e4986535a194a271509095534c3e7 Mon Sep 17 00:00:00 2001
From: Andrei Kartashev <a.kartashev@yadro.com>
Date: Tue, 9 Mar 2021 21:47:25 +0300
Subject: [PATCH] hwmon: (pmbus) Retry I2C request on failure

In real world I2C communication errors are possible. It was discovered
that pmbus read operation for some PSUs occasionally fails in random
places. For pmbus_set_page call there is already retry implemented, but
seems it is not enough.

Add retries for every i2c_smbus_read/i2c_smbus_write call to increase
robust.

Signed-off-by: Andrei Kartashev <a.kartashev@yadro.com>
---
 drivers/hwmon/pmbus/pmbus_core.c | 65 +++++++++++++++++++-------------
 1 file changed, 38 insertions(+), 27 deletions(-)

diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index 60ea917936a7..d98b52950022 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -27,6 +27,7 @@
  */
 #define PMBUS_ATTR_ALLOC_SIZE	32
 #define PMBUS_NAME_SIZE		24
+#define I2C_RETRIES 3
 
 struct pmbus_sensor {
 	struct pmbus_sensor *next;
@@ -144,7 +145,7 @@ EXPORT_SYMBOL_GPL(pmbus_clear_cache);
 int pmbus_set_page(struct i2c_client *client, int page, int phase)
 {
 	struct pmbus_data *data = i2c_get_clientdata(client);
-	int rv;
+	int rv, rtr;
 
 	if (page < 0)
 		return 0;
@@ -154,18 +155,12 @@ int pmbus_set_page(struct i2c_client *client, int page, int phase)
 		dev_dbg(&client->dev, "Want page %u, %u cached\n", page,
 			data->currpage);
 
-		rv = i2c_smbus_write_byte_data(client, PMBUS_PAGE, page);
-		if (rv < 0) {
+		for (rtr = 0, rv = -1; (rtr < I2C_RETRIES) && (rv < 0); rtr++)
 			rv = i2c_smbus_write_byte_data(client, PMBUS_PAGE,
 						       page);
-			dev_dbg(&client->dev,
-				"Failed to set page %u, performed one-shot retry %s: %d\n",
-				page, rv ? "and failed" : "with success", rv);
-			if (rv < 0)
-				return rv;
-		}
 
-		rv = i2c_smbus_read_byte_data(client, PMBUS_PAGE);
+		for (rtr = 0, rv = -1; (rtr < I2C_RETRIES) && (rv < 0); rtr++)
+			rv = i2c_smbus_read_byte_data(client, PMBUS_PAGE);
 		if (rv < 0)
 			return rv;
 
@@ -176,8 +171,9 @@ int pmbus_set_page(struct i2c_client *client, int page, int phase)
 
 	if (data->info->phases[page] && data->currphase != phase &&
 	    !(data->info->func[page] & PMBUS_PHASE_VIRTUAL)) {
-		rv = i2c_smbus_write_byte_data(client, PMBUS_PHASE,
-					       phase);
+		for (rtr = 0, rv = -1; (rtr < I2C_RETRIES) && (rv < 0); rtr++)
+			rv = i2c_smbus_write_byte_data(client, PMBUS_PHASE,
+						       phase);
 		if (rv)
 			return rv;
 	}
@@ -189,13 +185,15 @@ EXPORT_SYMBOL_GPL(pmbus_set_page);
 
 int pmbus_write_byte(struct i2c_client *client, int page, u8 value)
 {
-	int rv;
+	int rv, rtr;
 
 	rv = pmbus_set_page(client, page, 0xff);
 	if (rv < 0)
 		return rv;
 
-	return i2c_smbus_write_byte(client, value);
+	for (rtr = 0, rv = -1; (rtr < I2C_RETRIES) && (rv < 0); rtr++)
+		rv = i2c_smbus_write_byte(client, value);
+	return rv;
 }
 EXPORT_SYMBOL_GPL(pmbus_write_byte);
 
@@ -220,13 +218,15 @@ static int _pmbus_write_byte(struct i2c_client *client, int page, u8 value)
 int pmbus_write_word_data(struct i2c_client *client, int page, u8 reg,
 			  u16 word)
 {
-	int rv;
+	int rv, rtr;
 
 	rv = pmbus_set_page(client, page, 0xff);
 	if (rv < 0)
 		return rv;
 
-	return i2c_smbus_write_word_data(client, reg, word);
+	for (rtr = 0, rv = -1; (rtr < I2C_RETRIES) && (rv < 0); rtr++)
+		rv = i2c_smbus_write_word_data(client, reg, word);
+	return rv;
 }
 EXPORT_SYMBOL_GPL(pmbus_write_word_data);
 
@@ -302,13 +302,15 @@ EXPORT_SYMBOL_GPL(pmbus_update_fan);
 
 int pmbus_read_word_data(struct i2c_client *client, int page, int phase, u8 reg)
 {
-	int rv;
+	int rv, rtr;
 
 	rv = pmbus_set_page(client, page, phase);
 	if (rv < 0)
 		return rv;
 
-	return i2c_smbus_read_word_data(client, reg);
+	for (rtr = 0, rv = -1; (rtr < I2C_RETRIES) && (rv < 0); rtr++)
+		rv = i2c_smbus_read_word_data(client, reg);
+	return rv;
 }
 EXPORT_SYMBOL_GPL(pmbus_read_word_data);
 
@@ -361,25 +363,29 @@ static int __pmbus_read_word_data(struct i2c_client *client, int page, int reg)
 
 int pmbus_read_byte_data(struct i2c_client *client, int page, u8 reg)
 {
-	int rv;
+	int rv, rtr;
 
 	rv = pmbus_set_page(client, page, 0xff);
 	if (rv < 0)
 		return rv;
 
-	return i2c_smbus_read_byte_data(client, reg);
+	for (rtr = 0, rv = -1; (rtr < I2C_RETRIES) && (rv < 0); rtr++)
+		rv = i2c_smbus_read_byte_data(client, reg);
+	return rv;
 }
 EXPORT_SYMBOL_GPL(pmbus_read_byte_data);
 
 int pmbus_write_byte_data(struct i2c_client *client, int page, u8 reg, u8 value)
 {
-	int rv;
+	int rv, rtr;
 
 	rv = pmbus_set_page(client, page, 0xff);
 	if (rv < 0)
 		return rv;
 
-	return i2c_smbus_write_byte_data(client, reg, value);
+	for (rtr = 0, rv = -1; (rtr < I2C_RETRIES) && (rv < 0); rtr++)
+		rv = i2c_smbus_write_byte_data(client, reg, value);
+	return rv;
 }
 EXPORT_SYMBOL_GPL(pmbus_write_byte_data);
 
@@ -2193,7 +2199,7 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
 			     struct pmbus_driver_info *info)
 {
 	struct device *dev = &client->dev;
-	int page, ret;
+	int page, ret, rtr;
 
 	/*
 	 * Some PMBus chips don't support PMBUS_STATUS_WORD, so try
@@ -2201,10 +2207,13 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
 	 * Bail out if both registers are not supported.
 	 */
 	data->read_status = pmbus_read_status_word;
-	ret = i2c_smbus_read_word_data(client, PMBUS_STATUS_WORD);
+	for (rtr = 0, ret = -1; (rtr < I2C_RETRIES) && (ret < 0); rtr++)
+		ret = i2c_smbus_read_word_data(client, PMBUS_STATUS_WORD);
 	if (ret < 0 || ret == 0xffff) {
 		data->read_status = pmbus_read_status_byte;
-		ret = i2c_smbus_read_byte_data(client, PMBUS_STATUS_BYTE);
+		for (rtr = 0, ret = -1; (rtr < I2C_RETRIES) && (ret < 0); rtr++)
+			ret = i2c_smbus_read_byte_data(client,
+						       PMBUS_STATUS_BYTE);
 		if (ret < 0 || ret == 0xff) {
 			dev_err(dev, "PMBus status register not found\n");
 			return -ENODEV;
@@ -2214,7 +2223,8 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
 	}
 
 	/* Enable PEC if the controller supports it */
-	ret = i2c_smbus_read_byte_data(client, PMBUS_CAPABILITY);
+	for (rtr = 0, ret = -1; (rtr < I2C_RETRIES) && (ret < 0); rtr++)
+		ret = i2c_smbus_read_byte_data(client, PMBUS_CAPABILITY);
 	if (ret >= 0 && (ret & PB_CAPABILITY_ERROR_CHECK))
 		client->flags |= I2C_CLIENT_PEC;
 
@@ -2223,7 +2233,8 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
 	 * faults, and we should not try it. Also, in that case, writes into
 	 * limit registers need to be disabled.
 	 */
-	ret = i2c_smbus_read_byte_data(client, PMBUS_WRITE_PROTECT);
+	for (rtr = 0, ret = -1; (rtr < I2C_RETRIES) && (ret < 0); rtr++)
+		ret = i2c_smbus_read_byte_data(client, PMBUS_WRITE_PROTECT);
 	if (ret > 0 && (ret & PB_WP_ANY))
 		data->flags |= PMBUS_WRITE_PROTECTED | PMBUS_SKIP_STATUS_CHECK;
 
-- 
2.26.2




On Mon, 2021-03-08 at 16:54 -0600, Eddie James wrote:
> From: Andrew Jeffery <andrew@aj.id.au>
> 
> From extensive testing and tracing it was discovered that the
> MAX31785
> occasionally fails to switch pages despite ACK'ing the PAGE PMBus
> data
> write. I suspect this behaviour had been seen on other devices as
> well,
> as pmbus_set_page() already read-back the freshly set value and
> errored
> out if it wasn't what we requested.
> 
> In the case of the MAX31785 it was shown that a one-shot retry was
> enough to get the PAGE write to stick if the inital command failed.
> To
> improve robustness, only error out if the one-shot retry also fails
> to
> stick.
> 
> OpenBMC-Staging-Count: 1
> Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
> Signed-off-by: Joel Stanley <joel@jms.id.au>
> ---
>  drivers/hwmon/pmbus/pmbus_core.c | 31 ++++++++++++++++++++--------
> ---
>  1 file changed, 20 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/hwmon/pmbus/pmbus_core.c
> b/drivers/hwmon/pmbus/pmbus_core.c
> index 44c1a0a07509..dd4a09d18730 100644
> --- a/drivers/hwmon/pmbus/pmbus_core.c
> +++ b/drivers/hwmon/pmbus/pmbus_core.c
> @@ -151,25 +151,34 @@ int pmbus_set_page(struct i2c_client *client,
> int page, int phase)
>  
>  	if (!(data->info->func[page] & PMBUS_PAGE_VIRTUAL) &&
>  	    data->info->pages > 1 && page != data->currpage) {
> +		int i;
> +
>  		dev_dbg(&client->dev, "Want page %u, %u cached\n",
> page,
>  			data->currpage);
>  
> -		rv = i2c_smbus_write_byte_data(client, PMBUS_PAGE,
> page);
> -		if (rv < 0) {
> +		for (i = 0; i < 2; i++) {
>  			rv = i2c_smbus_write_byte_data(client,
> PMBUS_PAGE,
>  						       page);
> -			dev_dbg(&client->dev,
> -				"Failed to set page %u, performed one-
> shot retry %s: %d\n",
> -				page, rv ? "and failed" : "with
> success", rv);
> +			if (rv)
> +				continue;
> +
> +			rv = i2c_smbus_read_byte_data(client,
> PMBUS_PAGE);
>  			if (rv < 0)
> -				return rv;
> -		}
> +				continue;
>  
> -		rv = i2c_smbus_read_byte_data(client, PMBUS_PAGE);
> -		if (rv < 0)
> -			return rv;
> +			/* Success, exit loop */
> +			if (rv == page)
> +				break;
> +
> +			rv = i2c_smbus_read_byte_data(client,
> PMBUS_STATUS_CML);
> +			if (rv < 0)
> +				continue;
> +
> +			if (rv & PB_CML_FAULT_INVALID_DATA)
> +				return -EIO;
> +		}
>  
> -		if (rv != page)
> +		if (i == 2)
>  			return -EIO;
>  	}
>  	data->currpage = page;
-- 
Best regards,
Andrei Kartashev



  reply	other threads:[~2021-03-09 20:21 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-08 22:53 [PATCH linux dev-5.10 00/35] Rainier and Everest system updates Eddie James
2021-03-08 22:53 ` [PATCH linux dev-5.10 01/35] ARM: dts: aspeed: rainier: Add Operator Panel LEDs Eddie James
2021-03-12  0:05   ` Joel Stanley
2021-03-08 22:53 ` [PATCH linux dev-5.10 02/35] ARM: dts: aspeed: rainier: Add directly controlled LEDs Eddie James
2021-03-12  0:04   ` Joel Stanley
2021-03-08 22:53 ` [PATCH linux dev-5.10 03/35] ARM: dts: aspeed: rainier: Add gpio-keys-polled for fans Eddie James
2021-03-12  0:06   ` Joel Stanley
2021-03-08 22:53 ` [PATCH linux dev-5.10 04/35] ARM: dts: aspeed: rainier: Set MAX31785 config Eddie James
2021-03-12  0:07   ` Joel Stanley
2021-03-08 22:53 ` [PATCH linux dev-5.10 05/35] ARM: dts: aspeed: rainier: Add additional processor CFAMs Eddie James
2021-03-12  0:07   ` Joel Stanley
2021-03-08 22:53 ` [PATCH linux dev-5.10 06/35] ARM: dts: aspeed: rainier: Add leds that are off PCA9552 Eddie James
2021-03-12  0:09   ` Joel Stanley
2021-03-12  0:21   ` Milton Miller II
2021-03-12  0:30     ` Joel Stanley
     [not found]       ` <6ACEC474-8CFD-4BA9-B8FF-CCD41007AA67@linux.vnet.ibm.com>
2021-03-24 23:43         ` Joel Stanley
2021-04-26  5:59           ` vishwanatha subbanna
2021-04-26  5:59             ` vishwanatha subbanna
2021-04-27 21:22             ` Jacek Anaszewski
2021-04-27 21:22               ` Jacek Anaszewski
2021-04-28  7:30               ` vishwanatha subbanna
2021-03-08 22:53 ` [PATCH linux dev-5.10 07/35] ARM: dts: aspeed: rainier: Add leds that are off pic16f882 Eddie James
2021-03-12  0:10   ` Joel Stanley
2021-03-08 22:53 ` [PATCH linux dev-5.10 08/35] ARM: dts: aspeed: rainier: Add leds on optional DASD cards Eddie James
2021-03-12  0:10   ` Joel Stanley
2021-03-08 22:53 ` [PATCH linux dev-5.10 09/35] ARM: dts: aspeed: rainier: Add leds that are on optional PCI cable cards Eddie James
2021-03-12  0:11   ` Joel Stanley
2021-03-08 22:53 ` [PATCH linux dev-5.10 10/35] ARM: dts: aspeed: rainier: Add presence GPIOs Eddie James
2021-03-12  0:14   ` Joel Stanley
2021-03-08 22:53 ` [PATCH linux dev-5.10 11/35] ARM: dts: aspeed: rainier: Mark controllers as restricted Eddie James
2021-03-12  0:15   ` Joel Stanley
2021-03-08 22:53 ` [PATCH linux dev-5.10 12/35] ARM: dts: aspeed: rainier 4U: Fix fan configuration Eddie James
2021-03-12  0:17   ` Joel Stanley
2021-03-08 22:53 ` [PATCH linux dev-5.10 13/35] dt: bindings: mmc: Add phase control properties for the Aspeed SDHCI Eddie James
2021-03-12  0:19   ` Joel Stanley
2021-04-12  3:21     ` Andrew Jeffery
2021-03-08 22:53 ` [PATCH linux dev-5.10 14/35] mmc: sdhci: aspeed: Expose data sample phase delay tuning Eddie James
2021-03-08 22:53 ` [PATCH linux dev-5.10 15/35] ARM: dts: aspeed: tacoma: Add data sample phase delay for eMMC Eddie James
2021-03-08 22:54 ` [PATCH linux dev-5.10 16/35] ARM: dts: aspeed: tacoma: Remove CFAM reset GPIO Eddie James
2021-03-08 22:54 ` [PATCH linux dev-5.10 17/35] ARM: dts: aspeed: Everest: Add I2C components Eddie James
2021-03-08 22:54 ` [PATCH linux dev-5.10 18/35] ARM: dts: Aspeed: Everest: Add max31785 fan controller device Eddie James
2021-03-08 22:54 ` [PATCH linux dev-5.10 19/35] ARM: dts: Aspeed: Everest: Add FSI CFAMs and re-number engines Eddie James
2021-03-08 22:54 ` [PATCH linux dev-5.10 20/35] ARM: dts: Aspeed: Everest: Add pca9552 fan presence Eddie James
2021-03-08 22:54 ` [PATCH linux dev-5.10 21/35] ARM: dts: aspeed: everest: Add power supply i2c devices Eddie James
2021-03-08 22:54 ` [PATCH linux dev-5.10 22/35] ARM: dts: aspeed: everest: Add UCD90320 power sequencer Eddie James
2021-03-08 22:54 ` [PATCH linux dev-5.10 23/35] ARM: dts: aspeed: everest: GPIOs support Eddie James
2021-03-08 22:54 ` [PATCH linux dev-5.10 24/35] ARM: dts: Aspeed: Everest: Add RTC Eddie James
2021-03-08 22:54 ` [PATCH linux dev-5.10 25/35] ARM: dts: aspeed: rainier: Support pass 2 planar Eddie James
2021-03-08 22:54 ` [PATCH linux dev-5.10 26/35] fsi: scom: Handle FSI2PIB timeout Eddie James
2021-03-12  0:20   ` Joel Stanley
2021-03-08 22:54 ` [PATCH linux dev-5.10 27/35] net/ncsi: Avoid channel_monitor hrtimer deadlock Eddie James
2021-03-12  0:35   ` Joel Stanley
2021-03-08 22:54 ` [PATCH linux dev-5.10 28/35] ftgmac100: Restart MAC HW once Eddie James
2021-03-08 22:54 ` [PATCH linux dev-5.10 29/35] hwmon: (pmbus) Add a PMBUS_NO_CAPABILITY platform data flag Eddie James
2021-03-08 22:54 ` [PATCH linux dev-5.10 30/35] hwmon: (pmbus/ibm-cffps) Set the PMBUS_NO_CAPABILITY flag Eddie James
2021-03-12  0:23   ` Joel Stanley
2021-03-08 22:54 ` [PATCH linux dev-5.10 31/35] i2c: Allow throttling of transfers to client devices Eddie James
2021-03-08 22:54 ` [PATCH linux dev-5.10 32/35] pmbus: (ucd9000) Throttle SMBus transfers to avoid poor behaviour Eddie James
2021-03-08 22:54 ` [PATCH linux dev-5.10 33/35] pmbus: (core) Add a one-shot retry in pmbus_set_page() Eddie James
2021-03-09 20:21   ` Andrei Kartashev [this message]
2021-03-12  0:35   ` Joel Stanley
2021-03-08 22:54 ` [PATCH linux dev-5.10 34/35] pmbus: (max31785) Add a local pmbus_set_page() implementation Eddie James
2021-03-08 22:54 ` [PATCH linux dev-5.10 35/35] pmbus: (max31785) Retry enabling fans after writing MFR_FAN_CONFIG Eddie James
2021-03-12  0:37 ` [PATCH linux dev-5.10 00/35] Rainier and Everest system updates Joel Stanley

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ee1c9f78d721fd52d62087674ee282d73e7237de.camel@yadro.com \
    --to=a.kartashev@yadro.com \
    --cc=eajames@linux.ibm.com \
    --cc=openbmc@lists.ozlabs.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.