All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] mmc: cqhci: Avoid false "cqhci: CQE stuck on" by not open-coding timeout loop
@ 2020-04-13 23:27 Douglas Anderson
  2020-04-14 12:54 ` Adrian Hunter
  2020-04-17  9:30 ` Ulf Hansson
  0 siblings, 2 replies; 3+ messages in thread
From: Douglas Anderson @ 2020-04-13 23:27 UTC (permalink / raw)
  To: Adrian Hunter, Ritesh Harjani, Asutosh Das, Ulf Hansson,
	Venkat Gopalakrishnan
  Cc: linux-arm-msm, Douglas Anderson, Konstantin Dorfman,
	Linus Walleij, Subhash Jadavani, linux-kernel, linux-mmc

Open-coding a timeout loop invariably leads to errors with handling
the timeout properly in one corner case or another.  In the case of
cqhci we might report "CQE stuck on" even if it wasn't stuck on.
You'd just need this sequence of events to happen in cqhci_off():

1. Call ktime_get().
2. Something happens to interrupt the CPU for > 100 us (context switch
   or interrupt).
3. Check time and; set "timed_out" to true since > 100 us.
4. Read CQHCI_CTL.
5. Both "reg & CQHCI_HALT" and "timed_out" are true, so break.
6. Since "timed_out" is true, falsely print the error message.

Rather than fixing the polling loop, use readx_poll_timeout() like
many people do.  This has been time tested to handle the corner cases.

Fixes: a4080225f51d ("mmc: cqhci: support for command queue enabled host")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
---

 drivers/mmc/host/cqhci.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/mmc/host/cqhci.c b/drivers/mmc/host/cqhci.c
index c2239ee2c0ef..75934f3c117e 100644
--- a/drivers/mmc/host/cqhci.c
+++ b/drivers/mmc/host/cqhci.c
@@ -5,6 +5,7 @@
 #include <linux/delay.h>
 #include <linux/highmem.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/module.h>
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
@@ -349,12 +350,16 @@ static int cqhci_enable(struct mmc_host *mmc, struct mmc_card *card)
 /* CQHCI is idle and should halt immediately, so set a small timeout */
 #define CQHCI_OFF_TIMEOUT 100
 
+static u32 cqhci_read_ctl(struct cqhci_host *cq_host)
+{
+	return cqhci_readl(cq_host, CQHCI_CTL);
+}
+
 static void cqhci_off(struct mmc_host *mmc)
 {
 	struct cqhci_host *cq_host = mmc->cqe_private;
-	ktime_t timeout;
-	bool timed_out;
 	u32 reg;
+	int err;
 
 	if (!cq_host->enabled || !mmc->cqe_on || cq_host->recovery_halt)
 		return;
@@ -364,15 +369,9 @@ static void cqhci_off(struct mmc_host *mmc)
 
 	cqhci_writel(cq_host, CQHCI_HALT, CQHCI_CTL);
 
-	timeout = ktime_add_us(ktime_get(), CQHCI_OFF_TIMEOUT);
-	while (1) {
-		timed_out = ktime_compare(ktime_get(), timeout) > 0;
-		reg = cqhci_readl(cq_host, CQHCI_CTL);
-		if ((reg & CQHCI_HALT) || timed_out)
-			break;
-	}
-
-	if (timed_out)
+	err = readx_poll_timeout(cqhci_read_ctl, cq_host, reg,
+				 reg & CQHCI_HALT, 0, CQHCI_OFF_TIMEOUT);
+	if (err < 0)
 		pr_err("%s: cqhci: CQE stuck on\n", mmc_hostname(mmc));
 	else
 		pr_debug("%s: cqhci: CQE off\n", mmc_hostname(mmc));
-- 
2.26.0.110.g2183baf09c-goog


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] mmc: cqhci: Avoid false "cqhci: CQE stuck on" by not open-coding timeout loop
  2020-04-13 23:27 [PATCH] mmc: cqhci: Avoid false "cqhci: CQE stuck on" by not open-coding timeout loop Douglas Anderson
@ 2020-04-14 12:54 ` Adrian Hunter
  2020-04-17  9:30 ` Ulf Hansson
  1 sibling, 0 replies; 3+ messages in thread
From: Adrian Hunter @ 2020-04-14 12:54 UTC (permalink / raw)
  To: Douglas Anderson, Ritesh Harjani, Asutosh Das, Ulf Hansson,
	Venkat Gopalakrishnan
  Cc: linux-arm-msm, Konstantin Dorfman, Linus Walleij,
	Subhash Jadavani, linux-kernel, linux-mmc

On 14/04/20 2:27 am, Douglas Anderson wrote:
> Open-coding a timeout loop invariably leads to errors with handling
> the timeout properly in one corner case or another.  In the case of
> cqhci we might report "CQE stuck on" even if it wasn't stuck on.
> You'd just need this sequence of events to happen in cqhci_off():
> 
> 1. Call ktime_get().
> 2. Something happens to interrupt the CPU for > 100 us (context switch
>    or interrupt).
> 3. Check time and; set "timed_out" to true since > 100 us.
> 4. Read CQHCI_CTL.
> 5. Both "reg & CQHCI_HALT" and "timed_out" are true, so break.
> 6. Since "timed_out" is true, falsely print the error message.
> 
> Rather than fixing the polling loop, use readx_poll_timeout() like
> many people do.  This has been time tested to handle the corner cases.
> 
> Fixes: a4080225f51d ("mmc: cqhci: support for command queue enabled host")
> Signed-off-by: Douglas Anderson <dianders@chromium.org>

Acked-by: Adrian Hunter <adrian.hunter@intel.com>


> ---
> 
>  drivers/mmc/host/cqhci.c | 21 ++++++++++-----------
>  1 file changed, 10 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/mmc/host/cqhci.c b/drivers/mmc/host/cqhci.c
> index c2239ee2c0ef..75934f3c117e 100644
> --- a/drivers/mmc/host/cqhci.c
> +++ b/drivers/mmc/host/cqhci.c
> @@ -5,6 +5,7 @@
>  #include <linux/delay.h>
>  #include <linux/highmem.h>
>  #include <linux/io.h>
> +#include <linux/iopoll.h>
>  #include <linux/module.h>
>  #include <linux/dma-mapping.h>
>  #include <linux/slab.h>
> @@ -349,12 +350,16 @@ static int cqhci_enable(struct mmc_host *mmc, struct mmc_card *card)
>  /* CQHCI is idle and should halt immediately, so set a small timeout */
>  #define CQHCI_OFF_TIMEOUT 100
>  
> +static u32 cqhci_read_ctl(struct cqhci_host *cq_host)
> +{
> +	return cqhci_readl(cq_host, CQHCI_CTL);
> +}
> +
>  static void cqhci_off(struct mmc_host *mmc)
>  {
>  	struct cqhci_host *cq_host = mmc->cqe_private;
> -	ktime_t timeout;
> -	bool timed_out;
>  	u32 reg;
> +	int err;
>  
>  	if (!cq_host->enabled || !mmc->cqe_on || cq_host->recovery_halt)
>  		return;
> @@ -364,15 +369,9 @@ static void cqhci_off(struct mmc_host *mmc)
>  
>  	cqhci_writel(cq_host, CQHCI_HALT, CQHCI_CTL);
>  
> -	timeout = ktime_add_us(ktime_get(), CQHCI_OFF_TIMEOUT);
> -	while (1) {
> -		timed_out = ktime_compare(ktime_get(), timeout) > 0;
> -		reg = cqhci_readl(cq_host, CQHCI_CTL);
> -		if ((reg & CQHCI_HALT) || timed_out)
> -			break;
> -	}
> -
> -	if (timed_out)
> +	err = readx_poll_timeout(cqhci_read_ctl, cq_host, reg,
> +				 reg & CQHCI_HALT, 0, CQHCI_OFF_TIMEOUT);
> +	if (err < 0)
>  		pr_err("%s: cqhci: CQE stuck on\n", mmc_hostname(mmc));
>  	else
>  		pr_debug("%s: cqhci: CQE off\n", mmc_hostname(mmc));
> 


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] mmc: cqhci: Avoid false "cqhci: CQE stuck on" by not open-coding timeout loop
  2020-04-13 23:27 [PATCH] mmc: cqhci: Avoid false "cqhci: CQE stuck on" by not open-coding timeout loop Douglas Anderson
  2020-04-14 12:54 ` Adrian Hunter
@ 2020-04-17  9:30 ` Ulf Hansson
  1 sibling, 0 replies; 3+ messages in thread
From: Ulf Hansson @ 2020-04-17  9:30 UTC (permalink / raw)
  To: Douglas Anderson
  Cc: Adrian Hunter, Ritesh Harjani, Asutosh Das,
	Venkat Gopalakrishnan, linux-arm-msm, Konstantin Dorfman,
	Linus Walleij, Subhash Jadavani, Linux Kernel Mailing List,
	linux-mmc

On Tue, 14 Apr 2020 at 01:27, Douglas Anderson <dianders@chromium.org> wrote:
>
> Open-coding a timeout loop invariably leads to errors with handling
> the timeout properly in one corner case or another.  In the case of
> cqhci we might report "CQE stuck on" even if it wasn't stuck on.
> You'd just need this sequence of events to happen in cqhci_off():
>
> 1. Call ktime_get().
> 2. Something happens to interrupt the CPU for > 100 us (context switch
>    or interrupt).
> 3. Check time and; set "timed_out" to true since > 100 us.
> 4. Read CQHCI_CTL.
> 5. Both "reg & CQHCI_HALT" and "timed_out" are true, so break.
> 6. Since "timed_out" is true, falsely print the error message.
>
> Rather than fixing the polling loop, use readx_poll_timeout() like
> many people do.  This has been time tested to handle the corner cases.
>
> Fixes: a4080225f51d ("mmc: cqhci: support for command queue enabled host")
> Signed-off-by: Douglas Anderson <dianders@chromium.org>

Applied for fixes, and by adding a stable tag, thanks!

Kind regards
Uffe


> ---
>
>  drivers/mmc/host/cqhci.c | 21 ++++++++++-----------
>  1 file changed, 10 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/mmc/host/cqhci.c b/drivers/mmc/host/cqhci.c
> index c2239ee2c0ef..75934f3c117e 100644
> --- a/drivers/mmc/host/cqhci.c
> +++ b/drivers/mmc/host/cqhci.c
> @@ -5,6 +5,7 @@
>  #include <linux/delay.h>
>  #include <linux/highmem.h>
>  #include <linux/io.h>
> +#include <linux/iopoll.h>
>  #include <linux/module.h>
>  #include <linux/dma-mapping.h>
>  #include <linux/slab.h>
> @@ -349,12 +350,16 @@ static int cqhci_enable(struct mmc_host *mmc, struct mmc_card *card)
>  /* CQHCI is idle and should halt immediately, so set a small timeout */
>  #define CQHCI_OFF_TIMEOUT 100
>
> +static u32 cqhci_read_ctl(struct cqhci_host *cq_host)
> +{
> +       return cqhci_readl(cq_host, CQHCI_CTL);
> +}
> +
>  static void cqhci_off(struct mmc_host *mmc)
>  {
>         struct cqhci_host *cq_host = mmc->cqe_private;
> -       ktime_t timeout;
> -       bool timed_out;
>         u32 reg;
> +       int err;
>
>         if (!cq_host->enabled || !mmc->cqe_on || cq_host->recovery_halt)
>                 return;
> @@ -364,15 +369,9 @@ static void cqhci_off(struct mmc_host *mmc)
>
>         cqhci_writel(cq_host, CQHCI_HALT, CQHCI_CTL);
>
> -       timeout = ktime_add_us(ktime_get(), CQHCI_OFF_TIMEOUT);
> -       while (1) {
> -               timed_out = ktime_compare(ktime_get(), timeout) > 0;
> -               reg = cqhci_readl(cq_host, CQHCI_CTL);
> -               if ((reg & CQHCI_HALT) || timed_out)
> -                       break;
> -       }
> -
> -       if (timed_out)
> +       err = readx_poll_timeout(cqhci_read_ctl, cq_host, reg,
> +                                reg & CQHCI_HALT, 0, CQHCI_OFF_TIMEOUT);
> +       if (err < 0)
>                 pr_err("%s: cqhci: CQE stuck on\n", mmc_hostname(mmc));
>         else
>                 pr_debug("%s: cqhci: CQE off\n", mmc_hostname(mmc));
> --
> 2.26.0.110.g2183baf09c-goog
>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-04-17  9:31 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-13 23:27 [PATCH] mmc: cqhci: Avoid false "cqhci: CQE stuck on" by not open-coding timeout loop Douglas Anderson
2020-04-14 12:54 ` Adrian Hunter
2020-04-17  9:30 ` Ulf Hansson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.