All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] mtd: cfi_cmdset_0002: Change erase functions to retry for error
@ 2018-05-08 10:20 IKEGAMI Tokunori
  2018-05-08 11:20 ` Boris Brezillon
  0 siblings, 1 reply; 13+ messages in thread
From: IKEGAMI Tokunori @ 2018-05-08 10:20 UTC (permalink / raw)
  To: Brian Norris
  Cc: IKEGAMI Tokunori, PACKHAM Chris, David Woodhouse,
	Boris Brezillon, Marek Vasut, Richard Weinberger,
	Cyrille Pitchen, linux-mtd

From: Tokunori Ikegami <ikegami@allied-telesis.co.jp>

For the word write functions it is retried for error.
But it is not implemented to retry for the erase functions.
To make sure for the erase functions change to retry as same.

Signed-off-by: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
Cc: Chris Packham <chris.packham@alliedtelesis.co.nz>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: Marek Vasut <marek.vasut@gmail.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
Cc: linux-mtd@lists.infradead.org
---
 drivers/mtd/chips/cfi_cmdset_0002.c | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 56aa6b75213d..e2dc020cd102 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -42,7 +42,7 @@
 #define AMD_BOOTLOC_BUG
 #define FORCE_WORD_WRITE 0
 
-#define MAX_WORD_RETRIES 3
+#define MAX_WRITE_RETRIES 3
 
 #define SST49LF004B	        0x0060
 #define SST49LF040B	        0x0050
@@ -207,7 +207,7 @@ static void fixup_use_write_buffers(struct mtd_info *mtd)
 	struct map_info *map = mtd->priv;
 	struct cfi_private *cfi = map->fldrv_priv;
 	if (cfi->cfiq->BufWriteTimeoutTyp) {
-		pr_debug("Using buffer write method\n" );
+		pr_debug("Using buffer write method\n");
 		mtd->_write = cfi_amdstd_write_buffers;
 	}
 }
@@ -1577,7 +1577,7 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
 	}
 
 	pr_debug("MTD %s(): WRITE 0x%.8lx(0x%.8lx)\n",
-	       __func__, adr, datum.x[0] );
+	       __func__, adr, datum.x[0]);
 
 	if (mode == FL_OTP_WRITE)
 		otp_enter(map, chip, adr, map_bankwidth(map));
@@ -1643,10 +1643,10 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
 	/* Did we succeed? */
 	if (!chip_good(map, adr, datum)) {
 		/* reset on all failures. */
-		map_write( map, CMD(0xF0), chip->start );
+		map_write(map, CMD(0xF0), chip->start);
 		/* FIXME - should have reset delay before continuing */
 
-		if (++retry_cnt <= MAX_WORD_RETRIES)
+		if (++retry_cnt <= MAX_WRITE_RETRIES)
 			goto retry;
 
 		ret = -EIO;
@@ -1821,7 +1821,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 	datum = map_word_load(map, buf);
 
 	pr_debug("MTD %s(): WRITE 0x%.8lx(0x%.8lx)\n",
-	       __func__, adr, datum.x[0] );
+	       __func__, adr, datum.x[0]);
 
 	XIP_INVAL_CACHED_RANGE(map, adr, len);
 	ENABLE_VPP(map);
@@ -2105,7 +2105,7 @@ static int do_panic_write_oneword(struct map_info *map, struct flchip *chip,
 		map_write(map, CMD(0xF0), chip->start);
 		/* FIXME - should have reset delay before continuing */
 
-		if (++retry_cnt <= MAX_WORD_RETRIES)
+		if (++retry_cnt <= MAX_WRITE_RETRIES)
 			goto retry;
 
 		ret = -EIO;
@@ -2240,6 +2240,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 	unsigned long int adr;
 	DECLARE_WAITQUEUE(wait, current);
 	int ret = 0;
+	int retry_cnt = 0;
 
 	adr = cfi->addr_unlock1;
 
@@ -2251,12 +2252,13 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 	}
 
 	pr_debug("MTD %s(): ERASE 0x%.8lx\n",
-	       __func__, chip->start );
+	       __func__, chip->start);
 
 	XIP_INVAL_CACHED_RANGE(map, adr, map->size);
 	ENABLE_VPP(map);
 	xip_disable(map, chip, adr);
 
+ retry:
 	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
@@ -2297,7 +2299,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 
 		if (time_after(jiffies, timeo)) {
 			printk(KERN_WARNING "MTD %s(): software timeout\n",
-				__func__ );
+				__func__);
 			break;
 		}
 
@@ -2307,9 +2309,12 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 	/* Did we succeed? */
 	if (!chip_good(map, adr, map_word_ff(map))) {
 		/* reset on all failures. */
-		map_write( map, CMD(0xF0), chip->start );
+		map_write(map, CMD(0xF0), chip->start);
 		/* FIXME - should have reset delay before continuing */
 
+		if (++retry_cnt <= MAX_WRITE_RETRIES)
+			goto retry;
+
 		ret = -EIO;
 	}
 
@@ -2329,6 +2334,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 	unsigned long timeo = jiffies + HZ;
 	DECLARE_WAITQUEUE(wait, current);
 	int ret = 0;
+	int retry_cnt = 0;
 
 	adr += chip->start;
 
@@ -2340,12 +2346,13 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 	}
 
 	pr_debug("MTD %s(): ERASE 0x%.8lx\n",
-	       __func__, adr );
+	       __func__, adr);
 
 	XIP_INVAL_CACHED_RANGE(map, adr, len);
 	ENABLE_VPP(map);
 	xip_disable(map, chip, adr);
 
+ retry:
 	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
@@ -2389,7 +2396,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 		if (time_after(jiffies, timeo)) {
 			xip_enable(map, chip, adr);
 			printk(KERN_WARNING "MTD %s(): software timeout\n",
-				__func__ );
+				__func__);
 			break;
 		}
 
@@ -2399,9 +2406,12 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 	/* Did we succeed? */
 	if (!chip_good(map, adr, map_word_ff(map))) {
 		/* reset on all failures. */
-		map_write( map, CMD(0xF0), chip->start );
+		map_write(map, CMD(0xF0), chip->start);
 		/* FIXME - should have reset delay before continuing */
 
+		if (++retry_cnt <= MAX_WRITE_RETRIES)
+			goto retry;
+
 		ret = -EIO;
 	}
 
-- 
2.16.1

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] mtd: cfi_cmdset_0002: Change erase functions to retry for error
  2018-05-08 10:20 [PATCH] mtd: cfi_cmdset_0002: Change erase functions to retry for error IKEGAMI Tokunori
@ 2018-05-08 11:20 ` Boris Brezillon
  2018-05-08 14:15   ` IKEGAMI Tokunori
  2018-05-08 16:52   ` IKEGAMI Tokunori
  0 siblings, 2 replies; 13+ messages in thread
From: Boris Brezillon @ 2018-05-08 11:20 UTC (permalink / raw)
  To: IKEGAMI Tokunori
  Cc: Brian Norris, Boris Brezillon, Richard Weinberger, Marek Vasut,
	PACKHAM Chris, linux-mtd, Cyrille Pitchen, David Woodhouse

Hello,

On Tue, 8 May 2018 10:20:29 +0000
IKEGAMI Tokunori <ikegami@allied-telesis.co.jp> wrote:

> From: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
> 
> For the word write functions it is retried for error.
> But it is not implemented to retry for the erase functions.
> To make sure for the erase functions change to retry as same.

Can you give a bit more context, like why this is needed, where did
you see the problem (which chip/controller), ...

> 
> Signed-off-by: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
> Cc: Chris Packham <chris.packham@alliedtelesis.co.nz>
> Cc: David Woodhouse <dwmw2@infradead.org>
> Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
> Cc: Marek Vasut <marek.vasut@gmail.com>
> Cc: Richard Weinberger <richard@nod.at>
> Cc: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
> Cc: linux-mtd@lists.infradead.org
> ---
>  drivers/mtd/chips/cfi_cmdset_0002.c | 36 +++++++++++++++++++++++-------------
>  1 file changed, 23 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
> index 56aa6b75213d..e2dc020cd102 100644
> --- a/drivers/mtd/chips/cfi_cmdset_0002.c
> +++ b/drivers/mtd/chips/cfi_cmdset_0002.c
> @@ -42,7 +42,7 @@
>  #define AMD_BOOTLOC_BUG
>  #define FORCE_WORD_WRITE 0
>  
> -#define MAX_WORD_RETRIES 3
> +#define MAX_WRITE_RETRIES 3

Not sure why you rename this macro, but if it's needed can you do that
in a separate patch?

>  
>  #define SST49LF004B	        0x0060
>  #define SST49LF040B	        0x0050
> @@ -207,7 +207,7 @@ static void fixup_use_write_buffers(struct mtd_info *mtd)
>  	struct map_info *map = mtd->priv;
>  	struct cfi_private *cfi = map->fldrv_priv;
>  	if (cfi->cfiq->BufWriteTimeoutTyp) {
> -		pr_debug("Using buffer write method\n" );
> +		pr_debug("Using buffer write method\n");

Same for coding style issues, they should be fixed in a separate commit.

>  		mtd->_write = cfi_amdstd_write_buffers;
>  	}
>  }
> @@ -1577,7 +1577,7 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
>  	}
>  
>  	pr_debug("MTD %s(): WRITE 0x%.8lx(0x%.8lx)\n",
> -	       __func__, adr, datum.x[0] );
> +	       __func__, adr, datum.x[0]);
>  
>  	if (mode == FL_OTP_WRITE)
>  		otp_enter(map, chip, adr, map_bankwidth(map));
> @@ -1643,10 +1643,10 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
>  	/* Did we succeed? */
>  	if (!chip_good(map, adr, datum)) {
>  		/* reset on all failures. */
> -		map_write( map, CMD(0xF0), chip->start );
> +		map_write(map, CMD(0xF0), chip->start);
>  		/* FIXME - should have reset delay before continuing */
>  
> -		if (++retry_cnt <= MAX_WORD_RETRIES)
> +		if (++retry_cnt <= MAX_WRITE_RETRIES)
>  			goto retry;
>  
>  		ret = -EIO;
> @@ -1821,7 +1821,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
>  	datum = map_word_load(map, buf);
>  
>  	pr_debug("MTD %s(): WRITE 0x%.8lx(0x%.8lx)\n",
> -	       __func__, adr, datum.x[0] );
> +	       __func__, adr, datum.x[0]);
>  
>  	XIP_INVAL_CACHED_RANGE(map, adr, len);
>  	ENABLE_VPP(map);
> @@ -2105,7 +2105,7 @@ static int do_panic_write_oneword(struct map_info *map, struct flchip *chip,
>  		map_write(map, CMD(0xF0), chip->start);
>  		/* FIXME - should have reset delay before continuing */
>  
> -		if (++retry_cnt <= MAX_WORD_RETRIES)
> +		if (++retry_cnt <= MAX_WRITE_RETRIES)
>  			goto retry;
>  
>  		ret = -EIO;
> @@ -2240,6 +2240,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
>  	unsigned long int adr;
>  	DECLARE_WAITQUEUE(wait, current);
>  	int ret = 0;
> +	int retry_cnt = 0;
>  
>  	adr = cfi->addr_unlock1;
>  
> @@ -2251,12 +2252,13 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
>  	}
>  
>  	pr_debug("MTD %s(): ERASE 0x%.8lx\n",
> -	       __func__, chip->start );
> +	       __func__, chip->start);
>  
>  	XIP_INVAL_CACHED_RANGE(map, adr, map->size);
>  	ENABLE_VPP(map);
>  	xip_disable(map, chip, adr);
>  
> + retry:
>  	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
>  	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL);
>  	cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
> @@ -2297,7 +2299,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
>  
>  		if (time_after(jiffies, timeo)) {
>  			printk(KERN_WARNING "MTD %s(): software timeout\n",
> -				__func__ );
> +				__func__);
>  			break;
>  		}
>  
> @@ -2307,9 +2309,12 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
>  	/* Did we succeed? */
>  	if (!chip_good(map, adr, map_word_ff(map))) {
>  		/* reset on all failures. */
> -		map_write( map, CMD(0xF0), chip->start );
> +		map_write(map, CMD(0xF0), chip->start);
>  		/* FIXME - should have reset delay before continuing */
>  
> +		if (++retry_cnt <= MAX_WRITE_RETRIES)
> +			goto retry;

Shouldn't we have a MAX_ERASE_RETRY for that? Also, what makes you
think the following erase operations will work?

Thanks,

Boris

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH] mtd: cfi_cmdset_0002: Change erase functions to retry for error
  2018-05-08 11:20 ` Boris Brezillon
@ 2018-05-08 14:15   ` IKEGAMI Tokunori
  2018-05-08 16:52   ` IKEGAMI Tokunori
  1 sibling, 0 replies; 13+ messages in thread
From: IKEGAMI Tokunori @ 2018-05-08 14:15 UTC (permalink / raw)
  To: Boris Brezillon
  Cc: Brian Norris, Boris Brezillon, Richard Weinberger, Marek Vasut,
	PACKHAM Chris, linux-mtd, Cyrille Pitchen, David Woodhouse

Hi Boris-san,

Thank you so much for your review comments.

> > For the word write functions it is retried for error.
> > But it is not implemented to retry for the erase functions.
> > To make sure for the erase functions change to retry as same.
> 
> Can you give a bit more context, like why this is needed, where did
> you see the problem (which chip/controller), ...

  Okay I will do update the commit message to add more context.

> > -#define MAX_WORD_RETRIES 3
> > +#define MAX_WRITE_RETRIES 3
> 
> Not sure why you rename this macro, but if it's needed can you do that
> in a separate patch?

  The reason is just to share the definition for both the word write and erase functions.
  Okay I will consider to do this in a separate patch.

> > -		pr_debug("Using buffer write method\n" );
> > +		pr_debug("Using buffer write method\n");
> 
> Same for coding style issues, they should be fixed in a separate commit.

  Okay I will do that.

> > +		if (++retry_cnt <= MAX_WRITE_RETRIES)
> > +			goto retry;
> 
> Shouldn't we have a MAX_ERASE_RETRY for that?

  It is okay also and I also thought this at first.
  But the similar definitions are defined so I thought that it is better to be shared as MAX_WRITE_RETRIES.

> Also, what makes you think the following erase operations will work?

  I think that it is possible to be recovered by the retry and it is same for the word write operations.

Regards,
Ikegami

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH] mtd: cfi_cmdset_0002: Change erase functions to retry for error
  2018-05-08 11:20 ` Boris Brezillon
  2018-05-08 14:15   ` IKEGAMI Tokunori
@ 2018-05-08 16:52   ` IKEGAMI Tokunori
  2018-05-08 17:11     ` Boris Brezillon
  1 sibling, 1 reply; 13+ messages in thread
From: IKEGAMI Tokunori @ 2018-05-08 16:52 UTC (permalink / raw)
  To: Boris Brezillon
  Cc: Brian Norris, Boris Brezillon, Richard Weinberger, Marek Vasut,
	PACKHAM Chris, linux-mtd, Cyrille Pitchen, David Woodhouse

From: Tokunori Ikegami <ikegami@allied-telesis.co.jp>

For the word write functions it is retried for error.
But it is not implemented to retry for the erase functions.
To make sure for the erase functions change to retry as same.

This is needed to prevent the flash erase error caused only once.
It was caused by the error case of chip_good() in the do_erase_oneblock().
Also it was confirmed on the MACRONIX flash device MX29GL512FHT2I-11G.
But the error issue behavior is not able to reproduce at this moment.
The flash controller is parallel Flash interface integrated on BCM53003.

Also the change is possible to resolve other erase error.
For example the Erase suspend issue was caused on Cypress AMD NOR flash.
  S29GL01GS / S29GL512S / S29GL256S / S29GL128S

Signed-off-by: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
Cc: Chris Packham <chris.packham@alliedtelesis.co.nz>
Cc: Brian Norris <computersforpeace@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: Marek Vasut <marek.vasut@gmail.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
Cc: linux-mtd@lists.infradead.org
---
 drivers/mtd/chips/cfi_cmdset_0002.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 56aa6b75213d..f522a856526a 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -43,6 +43,7 @@
 #define FORCE_WORD_WRITE 0
 
 #define MAX_WORD_RETRIES 3
+#define MAX_ERASE_RETRY 3
 
 #define SST49LF004B	        0x0060
 #define SST49LF040B	        0x0050
@@ -2240,6 +2241,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 	unsigned long int adr;
 	DECLARE_WAITQUEUE(wait, current);
 	int ret = 0;
+	int retry_cnt = 0;
 
 	adr = cfi->addr_unlock1;
 
@@ -2257,6 +2259,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 	ENABLE_VPP(map);
 	xip_disable(map, chip, adr);
 
+ retry:
 	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
@@ -2310,6 +2313,9 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 		map_write( map, CMD(0xF0), chip->start );
 		/* FIXME - should have reset delay before continuing */
 
+		if (++retry_cnt <= MAX_ERASE_RETRY)
+			goto retry;
+
 		ret = -EIO;
 	}
 
@@ -2329,6 +2335,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 	unsigned long timeo = jiffies + HZ;
 	DECLARE_WAITQUEUE(wait, current);
 	int ret = 0;
+	int retry_cnt = 0;
 
 	adr += chip->start;
 
@@ -2346,6 +2353,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 	ENABLE_VPP(map);
 	xip_disable(map, chip, adr);
 
+ retry:
 	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
@@ -2402,6 +2410,9 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 		map_write( map, CMD(0xF0), chip->start );
 		/* FIXME - should have reset delay before continuing */
 
+		if (++retry_cnt <= MAX_ERASE_RETRY)
+			goto retry;
+
 		ret = -EIO;
 	}
 
-- 
2.16.1

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] mtd: cfi_cmdset_0002: Change erase functions to retry for error
  2018-05-08 16:52   ` IKEGAMI Tokunori
@ 2018-05-08 17:11     ` Boris Brezillon
  2018-05-08 17:35       ` IKEGAMI Tokunori
  2018-05-09  8:18       ` Joakim Tjernlund
  0 siblings, 2 replies; 13+ messages in thread
From: Boris Brezillon @ 2018-05-08 17:11 UTC (permalink / raw)
  To: IKEGAMI Tokunori
  Cc: Brian Norris, Boris Brezillon, Richard Weinberger, Marek Vasut,
	PACKHAM Chris, linux-mtd, Cyrille Pitchen, David Woodhouse,
	Joakim Tjernlund

+Joakim

On Tue, 8 May 2018 16:52:55 +0000
IKEGAMI Tokunori <ikegami@allied-telesis.co.jp> wrote:

> From: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
> 
> For the word write functions it is retried for error.
> But it is not implemented to retry for the erase functions.
> To make sure for the erase functions change to retry as same.
> 
> This is needed to prevent the flash erase error caused only once.
> It was caused by the error case of chip_good() in the do_erase_oneblock().
> Also it was confirmed on the MACRONIX flash device MX29GL512FHT2I-11G.
> But the error issue behavior is not able to reproduce at this moment.

Hm, that's a problem. How can you be sure the retry approach fixes the
issue if you can't reproduce the it?

> The flash controller is parallel Flash interface integrated on BCM53003.
> 
> Also the change is possible to resolve other erase error.
> For example the Erase suspend issue was caused on Cypress AMD NOR flash.
>   S29GL01GS / S29GL512S / S29GL256S / S29GL128S

Is the Erase suspend issue related to this problem?

> 
> Signed-off-by: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
> Cc: Chris Packham <chris.packham@alliedtelesis.co.nz>
> Cc: Brian Norris <computersforpeace@gmail.com>
> Cc: David Woodhouse <dwmw2@infradead.org>
> Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
> Cc: Marek Vasut <marek.vasut@gmail.com>
> Cc: Richard Weinberger <richard@nod.at>
> Cc: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
> Cc: linux-mtd@lists.infradead.org
> ---
>  drivers/mtd/chips/cfi_cmdset_0002.c | 11 +++++++++++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
> index 56aa6b75213d..f522a856526a 100644
> --- a/drivers/mtd/chips/cfi_cmdset_0002.c
> +++ b/drivers/mtd/chips/cfi_cmdset_0002.c
> @@ -43,6 +43,7 @@
>  #define FORCE_WORD_WRITE 0
>  
>  #define MAX_WORD_RETRIES 3
> +#define MAX_ERASE_RETRY 3
>  
>  #define SST49LF004B	        0x0060
>  #define SST49LF040B	        0x0050
> @@ -2240,6 +2241,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
>  	unsigned long int adr;
>  	DECLARE_WAITQUEUE(wait, current);
>  	int ret = 0;
> +	int retry_cnt = 0;
>  
>  	adr = cfi->addr_unlock1;
>  
> @@ -2257,6 +2259,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
>  	ENABLE_VPP(map);
>  	xip_disable(map, chip, adr);
>  
> + retry:
>  	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
>  	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL);
>  	cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
> @@ -2310,6 +2313,9 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
>  		map_write( map, CMD(0xF0), chip->start );
>  		/* FIXME - should have reset delay before continuing */
>  
> +		if (++retry_cnt <= MAX_ERASE_RETRY)
> +			goto retry;
> +
>  		ret = -EIO;
>  	}
>  
> @@ -2329,6 +2335,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
>  	unsigned long timeo = jiffies + HZ;
>  	DECLARE_WAITQUEUE(wait, current);
>  	int ret = 0;
> +	int retry_cnt = 0;
>  
>  	adr += chip->start;
>  
> @@ -2346,6 +2353,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
>  	ENABLE_VPP(map);
>  	xip_disable(map, chip, adr);
>  
> + retry:
>  	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
>  	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL);
>  	cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
> @@ -2402,6 +2410,9 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
>  		map_write( map, CMD(0xF0), chip->start );
>  		/* FIXME - should have reset delay before continuing */
>  
> +		if (++retry_cnt <= MAX_ERASE_RETRY)
> +			goto retry;
> +
>  		ret = -EIO;
>  	}
>  

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH] mtd: cfi_cmdset_0002: Change erase functions to retry for error
  2018-05-08 17:11     ` Boris Brezillon
@ 2018-05-08 17:35       ` IKEGAMI Tokunori
  2018-05-08 18:02         ` Boris Brezillon
  2018-05-09  8:18       ` Joakim Tjernlund
  1 sibling, 1 reply; 13+ messages in thread
From: IKEGAMI Tokunori @ 2018-05-08 17:35 UTC (permalink / raw)
  To: Boris Brezillon
  Cc: Brian Norris, Boris Brezillon, Richard Weinberger, Marek Vasut,
	PACKHAM Chris, linux-mtd, Cyrille Pitchen, David Woodhouse,
	Joakim Tjernlund

Hi Boris-san,

Thanks for your reviewing.

> > This is needed to prevent the flash erase error caused only once.
> > It was caused by the error case of chip_good() in the do_erase_oneblock().
> > Also it was confirmed on the MACRONIX flash device MX29GL512FHT2I-11G.
> > But the error issue behavior is not able to reproduce at this moment.
> 
> Hm, that's a problem. How can you be sure the retry approach fixes the
> issue if you can't reproduce the it?

  I can reproduce the flash write error by the same reproduction method.
  And the error can be resolved by the same approach fixes as below.
    <http://lists.infradead.org/pipermail/linux-mtd/2018-May/080794.html>
  So it seems that the erase error also can be resolved by the changes.

> > The flash controller is parallel Flash interface integrated on BCM53003.
> >
> > Also the change is possible to resolve other erase error.
> > For example the Erase suspend issue was caused on Cypress AMD NOR flash.
> >   S29GL01GS / S29GL512S / S29GL256S / S29GL128S
> 
> Is the Erase suspend issue related to this problem?

  I am not sure about the issue behavior detail since the issue is not caused on my environment.
  The issue was mentioned by Jocke-san on the following mail.
    <http://lists.infradead.org/pipermail/linux-mtd/2018-April/080518.html>
  Also I am asking to him to try the original patch on this thread.

Regards,
Ikegami

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] mtd: cfi_cmdset_0002: Change erase functions to retry for error
  2018-05-08 17:35       ` IKEGAMI Tokunori
@ 2018-05-08 18:02         ` Boris Brezillon
  2018-05-09  0:27           ` IKEGAMI Tokunori
  0 siblings, 1 reply; 13+ messages in thread
From: Boris Brezillon @ 2018-05-08 18:02 UTC (permalink / raw)
  To: IKEGAMI Tokunori
  Cc: Brian Norris, Boris Brezillon, Richard Weinberger, Marek Vasut,
	PACKHAM Chris, linux-mtd, Cyrille Pitchen, David Woodhouse,
	Joakim Tjernlund

Hi,

On Tue, 8 May 2018 17:35:48 +0000
IKEGAMI Tokunori <ikegami@allied-telesis.co.jp> wrote:

> Hi Boris-san,
> 
> Thanks for your reviewing.
> 
> > > This is needed to prevent the flash erase error caused only once.
> > > It was caused by the error case of chip_good() in the do_erase_oneblock().
> > > Also it was confirmed on the MACRONIX flash device MX29GL512FHT2I-11G.
> > > But the error issue behavior is not able to reproduce at this moment.  
> > 
> > Hm, that's a problem. How can you be sure the retry approach fixes the
> > issue if you can't reproduce the it?  
> 
>   I can reproduce the flash write error by the same reproduction method.
>   And the error can be resolved by the same approach fixes as below.
>     <http://lists.infradead.org/pipermail/linux-mtd/2018-May/080794.html>

Hm, even the bug fix for the write path looks suspicious. BTW, you're
only checking the last written word, so how can you be sure all other
words have been written correctly? I don't know a lot about CFI chips,
but are you sure this is not a timing issue (timings mis-configured on
the controller side)?

>   So it seems that the erase error also can be resolved by the changes.

Well, I'd be more comfortable if someone else could confirm the bug
(ideally tested with a different controller) and validate the fix.

> 
> > > The flash controller is parallel Flash interface integrated on BCM53003.
> > >
> > > Also the change is possible to resolve other erase error.
> > > For example the Erase suspend issue was caused on Cypress AMD NOR flash.
> > >   S29GL01GS / S29GL512S / S29GL256S / S29GL128S  
> > 
> > Is the Erase suspend issue related to this problem?  
> 
>   I am not sure about the issue behavior detail since the issue is not caused on my environment.
>   The issue was mentioned by Jocke-san on the following mail.
>     <http://lists.infradead.org/pipermail/linux-mtd/2018-April/080518.html>
>   Also I am asking to him to try the original patch on this thread.

Ok, let's wait for Joakim's feedback then.

Thanks,

Boris

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH] mtd: cfi_cmdset_0002: Change erase functions to retry for error
  2018-05-08 18:02         ` Boris Brezillon
@ 2018-05-09  0:27           ` IKEGAMI Tokunori
  0 siblings, 0 replies; 13+ messages in thread
From: IKEGAMI Tokunori @ 2018-05-09  0:27 UTC (permalink / raw)
  To: Boris Brezillon
  Cc: Brian Norris, Boris Brezillon, Richard Weinberger, Marek Vasut,
	PACKHAM Chris, linux-mtd, Cyrille Pitchen, David Woodhouse,
	Joakim Tjernlund

Hi Boris-san,

Thanks for your comments.

> >   I can reproduce the flash write error by the same reproduction method.
> >   And the error can be resolved by the same approach fixes as below.
> >
> <http://lists.infradead.org/pipermail/linux-mtd/2018-May/080794.html>
> 
> Hm, even the bug fix for the write path looks suspicious. BTW, you're
> only checking the last written word, so how can you be sure all other
> words have been written correctly?

  For the write buffer I think that it is enough to check only the last word.
  Since it is described by the data sheets to check the operation status.

> I don't know a lot about CFI chips,
> but are you sure this is not a timing issue (timings mis-configured on
> the controller side)?

  Yes I think that this is not a timing issue by mis-configuration.
  But it is still possible to be a timing issue but not mis-configured.
  The write and erase errors were caused by enabling CPU external sync mode.
  For the CPU erratum workaround the sync mode is required to be enabled.

> >   Also I am asking to him to try the original patch on this thread.
> 
> Ok, let's wait for Joakim's feedback then.

  Noted this.

Regards,
Ikegami

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] mtd: cfi_cmdset_0002: Change erase functions to retry for error
  2018-05-08 17:11     ` Boris Brezillon
  2018-05-08 17:35       ` IKEGAMI Tokunori
@ 2018-05-09  8:18       ` Joakim Tjernlund
  2018-05-09 10:20         ` IKEGAMI Tokunori
  1 sibling, 1 reply; 13+ messages in thread
From: Joakim Tjernlund @ 2018-05-09  8:18 UTC (permalink / raw)
  To: boris.brezillon, ikegami
  Cc: linux-mtd, chris.packham, cyrille.pitchen, dwmw2,
	computersforpeace, boris.brezillon, marek.vasut, richard

On Tue, 2018-05-08 at 19:11 +0200, Boris Brezillon wrote:
> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you recognize the sender and know the content is safe.
> 
> 
> +Joakim
> 
> On Tue, 8 May 2018 16:52:55 +0000
> IKEGAMI Tokunori <ikegami@allied-telesis.co.jp> wrote:
> 
> > From: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
> > 
> > For the word write functions it is retried for error.
> > But it is not implemented to retry for the erase functions.
> > To make sure for the erase functions change to retry as same.
> > 
> > This is needed to prevent the flash erase error caused only once.
> > It was caused by the error case of chip_good() in the do_erase_oneblock().
> > Also it was confirmed on the MACRONIX flash device MX29GL512FHT2I-11G.
> > But the error issue behavior is not able to reproduce at this moment.
> 
> Hm, that's a problem. How can you be sure the retry approach fixes the
> issue if you can't reproduce the it?
> 
> > The flash controller is parallel Flash interface integrated on BCM53003.
> > 
> > Also the change is possible to resolve other erase error.
> > For example the Erase suspend issue was caused on Cypress AMD NOR flash.
> >   S29GL01GS / S29GL512S / S29GL256S / S29GL128S
> 
> Is the Erase suspend issue related to this problem?
> 

My Erase suspend issue is not related to just retrying the erase.
We have seen several times that too many Erase suspend for one block will cause 5.5.2.6 DQ5:
 Exceeded Timing Limits
This condition is not checked for, nor handled in any way.
Once this condition has occurred, a simple retry wont fix it. One must
first issue a flash reset:
  map_write( map, CMD(0xF0), chip->in_progress_block_addr); /* Reset */
which will clear this error condition and return the flash to read array more.
Now you can restart the erase again and it will probably finish the erase
this time(at least in our testing).

Testing for DQ5: Exceeded Timing Limits is not straight forward in current 0002 driver
since it requires more than just testing for toggling bits. I have not
made any progress on this due to other more pressing work.

Note: In our testing we needed over 300000 suspend/resume before hitting the DQ5
limit.
Here is my test hack so far:
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 23a893ab4264..38b52e2c6fb6 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -829,6 +829,7 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
                chip->oldstate = FL_ERASING;
                chip->state = FL_ERASE_SUSPENDING;
                chip->erase_suspended = 1;
+               chip->in_progress_suspends++;
                for (;;) {
                        if (chip_ready(map, adr))
                                break;
@@ -839,8 +840,33 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
                                 * there was an error (so leave the erase
                                 * routine to recover from it) or we trying to
                                 * use the erase-in-progress sector. */
+                               map_word status = map_read(map, adr);
+
+                               map_write( map, CMD(0xF0), chip->in_progress_block_addr); /* Reset */
+#if 1
+                               /* Restart erase */
+                               cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi,
+                                                cfi->device_type, NULL);
+                               cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi,
+                                                cfi->device_type, NULL);
+                               cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi,
+                                                cfi->device_type, NULL);
+                               cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi,
+                                                cfi->device_type, NULL);
+                               cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi,
+                                                cfi->device_type, NULL);
+                               map_write(map, cfi->sector_erase_cmd, chip->in_progress_block_addr);
+#endif
+//                             chip->oldstate = FL_READY;
+//                             chip->state = FL_READY;
+
                                put_chip(map, chip, adr);
-                               printk(KERN_ERR "MTD %s(): chip not ready after erase suspend\n", __func__);
+                               printk(KERN_ERR "MTD %s(): chip not ready after erase suspend, block_addr:0x%lx, "
+                                      "block_mask:0x%lx, adr:0x%lx, suspends:%ld, status:0x%lx \n", __func__,
+                                      chip->in_progress_block_addr, chip->in_progress_block_addr, adr,
+                                      chip->in_progress_suspends,
+                                      status.x[0]);
+
                                return -EIO;
                        }
 
@@ -2270,7 +2296,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
        chip->erase_suspended = 0;
        chip->in_progress_block_addr = adr;
        chip->in_progress_block_mask = ~(map->size - 1);
-
+       chip->in_progress_suspends = 0;
        INVALIDATE_CACHE_UDELAY(map, chip,
                                adr, map->size,
                                chip->erase_time*500);
diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h
index 3529683f691e..8e7ba8244ced 100644
--- a/include/linux/mtd/flashchip.h
+++ b/include/linux/mtd/flashchip.h
@@ -86,6 +86,7 @@ struct flchip {
        unsigned int erase_suspended:1;
        unsigned long in_progress_block_addr;
        unsigned long in_progress_block_mask;
+       unsigned long in_progress_suspends;
 
        struct mutex mutex;
        wait_queue_head_t wq; /* Wait on here when we're waiting for the chip


Which printed(when error occurred):
MTD get_chip(): chip not ready after erase suspend, block_addr:0x3480000, block_mask:0x3480000, adr:0x3a7da84,
suspends:319941, status:0x28


 Jocke

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* RE: [PATCH] mtd: cfi_cmdset_0002: Change erase functions to retry for error
  2018-05-09  8:18       ` Joakim Tjernlund
@ 2018-05-09 10:20         ` IKEGAMI Tokunori
  2018-05-09 10:26           ` IKEGAMI Tokunori
  0 siblings, 1 reply; 13+ messages in thread
From: IKEGAMI Tokunori @ 2018-05-09 10:20 UTC (permalink / raw)
  To: Joakim Tjernlund, boris.brezillon
  Cc: linux-mtd, PACKHAM Chris, cyrille.pitchen, dwmw2,
	computersforpeace, boris.brezillon, marek.vasut, richard

Hi Jocke-san,

  Thanks for the mail.
  I have just understood about the Erase suspend issue is not related to the change.
  Also it is not able to be resolved by the chagne.

Hi Boris-san,

  So the below commit message is not correct so I will update it to remove to describe the latter part about the Erase suspend issue.

> > > Also the change is possible to resolve other erase error.
> > > For example the Erase suspend issue was caused on Cypress AMD NOR flash.
> > >   S29GL01GS / S29GL512S / S29GL256S / S29GL128S 

Regards,
Ikegami

> -----Original Message-----
> From: Joakim Tjernlund [mailto:Joakim.Tjernlund@infinera.com]
> Sent: Wednesday, May 09, 2018 5:19 PM
> To: boris.brezillon@bootlin.com; IKEGAMI Tokunori
> Cc: linux-mtd@lists.infradead.org; PACKHAM Chris;
> cyrille.pitchen@wedev4u.fr; dwmw2@infradead.org;
> computersforpeace@gmail.com; boris.brezillon@free-electrons.com;
> marek.vasut@gmail.com; richard@nod.at
> Subject: Re: [PATCH] mtd: cfi_cmdset_0002: Change erase functions to retry
> for error
> 
> On Tue, 2018-05-08 at 19:11 +0200, Boris Brezillon wrote:
> > CAUTION: This email originated from outside of the organization. Do not
> click links or open attachments unless you recognize the sender and know
> the content is safe.
> >
> >
> > +Joakim
> >
> > On Tue, 8 May 2018 16:52:55 +0000
> > IKEGAMI Tokunori <ikegami@allied-telesis.co.jp> wrote:
> >
> > > From: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
> > >
> > > For the word write functions it is retried for error.
> > > But it is not implemented to retry for the erase functions.
> > > To make sure for the erase functions change to retry as same.
> > >
> > > This is needed to prevent the flash erase error caused only once.
> > > It was caused by the error case of chip_good() in the
> do_erase_oneblock().
> > > Also it was confirmed on the MACRONIX flash device MX29GL512FHT2I-11G.
> > > But the error issue behavior is not able to reproduce at this moment.
> >
> > Hm, that's a problem. How can you be sure the retry approach fixes the
> > issue if you can't reproduce the it?
> >
> > > The flash controller is parallel Flash interface integrated on BCM53003.
> > >
> > > Also the change is possible to resolve other erase error.
> > > For example the Erase suspend issue was caused on Cypress AMD NOR flash.
> > >   S29GL01GS / S29GL512S / S29GL256S / S29GL128S
> >
> > Is the Erase suspend issue related to this problem?
> >
> 
> My Erase suspend issue is not related to just retrying the erase.
> We have seen several times that too many Erase suspend for one block will
> cause 5.5.2.6 DQ5:
>  Exceeded Timing Limits
> This condition is not checked for, nor handled in any way.
> Once this condition has occurred, a simple retry wont fix it. One must
> first issue a flash reset:
>   map_write( map, CMD(0xF0), chip->in_progress_block_addr); /* Reset */
> which will clear this error condition and return the flash to read array
> more.
> Now you can restart the erase again and it will probably finish the erase
> this time(at least in our testing).
> 
> Testing for DQ5: Exceeded Timing Limits is not straight forward in current
> 0002 driver
> since it requires more than just testing for toggling bits. I have not
> made any progress on this due to other more pressing work.
> 
> Note: In our testing we needed over 300000 suspend/resume before hitting
> the DQ5
> limit.
> Here is my test hack so far:
> diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c
> b/drivers/mtd/chips/cfi_cmdset_0002.c
> index 23a893ab4264..38b52e2c6fb6 100644
> --- a/drivers/mtd/chips/cfi_cmdset_0002.c
> +++ b/drivers/mtd/chips/cfi_cmdset_0002.c
> @@ -829,6 +829,7 @@ static int get_chip(struct map_info *map, struct flchip
> *chip, unsigned long adr
>                 chip->oldstate = FL_ERASING;
>                 chip->state = FL_ERASE_SUSPENDING;
>                 chip->erase_suspended = 1;
> +               chip->in_progress_suspends++;
>                 for (;;) {
>                         if (chip_ready(map, adr))
>                                 break;
> @@ -839,8 +840,33 @@ static int get_chip(struct map_info *map, struct flchip
> *chip, unsigned long adr
>                                  * there was an error (so leave the erase
>                                  * routine to recover from it) or we trying
> to
>                                  * use the erase-in-progress sector. */
> +                               map_word status = map_read(map, adr);
> +
> +                               map_write( map, CMD(0xF0),
> chip->in_progress_block_addr); /* Reset */
> +#if 1
> +                               /* Restart erase */
> +                               cfi_send_gen_cmd(0xAA,
> cfi->addr_unlock1, chip->start, map, cfi,
> +                                                cfi->device_type,
> NULL);
> +                               cfi_send_gen_cmd(0x55,
> cfi->addr_unlock2, chip->start, map, cfi,
> +                                                cfi->device_type,
> NULL);
> +                               cfi_send_gen_cmd(0x80,
> cfi->addr_unlock1, chip->start, map, cfi,
> +                                                cfi->device_type,
> NULL);
> +                               cfi_send_gen_cmd(0xAA,
> cfi->addr_unlock1, chip->start, map, cfi,
> +                                                cfi->device_type,
> NULL);
> +                               cfi_send_gen_cmd(0x55,
> cfi->addr_unlock2, chip->start, map, cfi,
> +                                                cfi->device_type,
> NULL);
> +                               map_write(map, cfi->sector_erase_cmd,
> chip->in_progress_block_addr);
> +#endif
> +//                             chip->oldstate = FL_READY;
> +//                             chip->state = FL_READY;
> +
>                                 put_chip(map, chip, adr);
> -                               printk(KERN_ERR "MTD %s(): chip not ready
> after erase suspend\n", __func__);
> +                               printk(KERN_ERR "MTD %s(): chip not ready
> after erase suspend, block_addr:0x%lx, "
> +                                      "block_mask:0x%lx, adr:0x%lx,
> suspends:%ld, status:0x%lx \n", __func__,
> +                                      chip->in_progress_block_addr,
> chip->in_progress_block_addr, adr,
> +                                      chip->in_progress_suspends,
> +                                      status.x[0]);
> +
>                                 return -EIO;
>                         }
> 
> @@ -2270,7 +2296,7 @@ static int __xipram do_erase_chip(struct map_info
> *map, struct flchip *chip)
>         chip->erase_suspended = 0;
>         chip->in_progress_block_addr = adr;
>         chip->in_progress_block_mask = ~(map->size - 1);
> -
> +       chip->in_progress_suspends = 0;
>         INVALIDATE_CACHE_UDELAY(map, chip,
>                                 adr, map->size,
>                                 chip->erase_time*500);
> diff --git a/include/linux/mtd/flashchip.h
> b/include/linux/mtd/flashchip.h
> index 3529683f691e..8e7ba8244ced 100644
> --- a/include/linux/mtd/flashchip.h
> +++ b/include/linux/mtd/flashchip.h
> @@ -86,6 +86,7 @@ struct flchip {
>         unsigned int erase_suspended:1;
>         unsigned long in_progress_block_addr;
>         unsigned long in_progress_block_mask;
> +       unsigned long in_progress_suspends;
> 
>         struct mutex mutex;
>         wait_queue_head_t wq; /* Wait on here when we're waiting for the
> chip
> 
> 
> Which printed(when error occurred):
> MTD get_chip(): chip not ready after erase suspend, block_addr:0x3480000,
> block_mask:0x3480000, adr:0x3a7da84,
> suspends:319941, status:0x28
> 
> 
>  Jocke

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH] mtd: cfi_cmdset_0002: Change erase functions to retry for error
  2018-05-09 10:20         ` IKEGAMI Tokunori
@ 2018-05-09 10:26           ` IKEGAMI Tokunori
  2018-05-09 11:45             ` Boris Brezillon
  0 siblings, 1 reply; 13+ messages in thread
From: IKEGAMI Tokunori @ 2018-05-09 10:26 UTC (permalink / raw)
  To: IKEGAMI Tokunori, Joakim Tjernlund, boris.brezillon
  Cc: boris.brezillon, richard, marek.vasut, PACKHAM Chris, linux-mtd,
	cyrille.pitchen, computersforpeace, dwmw2

From: Tokunori Ikegami <ikegami@allied-telesis.co.jp>

For the word write functions it is retried for error.
But it is not implemented to retry for the erase functions.
To make sure for the erase functions change to retry as same.

This is needed to prevent the flash erase error caused only once.
It was caused by the error case of chip_good() in the do_erase_oneblock().
Also it was confirmed on the MACRONIX flash device MX29GL512FHT2I-11G.
But the error issue behavior is not able to reproduce at this moment.
The flash controller is parallel Flash interface integrated on BCM53003.

Signed-off-by: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
Cc: Chris Packham <chris.packham@alliedtelesis.co.nz>
Cc: Brian Norris <computersforpeace@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: Marek Vasut <marek.vasut@gmail.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
Cc: linux-mtd@lists.infradead.org
---
 drivers/mtd/chips/cfi_cmdset_0002.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 56aa6b75213d..f522a856526a 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -43,6 +43,7 @@
 #define FORCE_WORD_WRITE 0
 
 #define MAX_WORD_RETRIES 3
+#define MAX_ERASE_RETRY 3
 
 #define SST49LF004B	        0x0060
 #define SST49LF040B	        0x0050
@@ -2240,6 +2241,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 	unsigned long int adr;
 	DECLARE_WAITQUEUE(wait, current);
 	int ret = 0;
+	int retry_cnt = 0;
 
 	adr = cfi->addr_unlock1;
 
@@ -2257,6 +2259,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 	ENABLE_VPP(map);
 	xip_disable(map, chip, adr);
 
+ retry:
 	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
@@ -2310,6 +2313,9 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 		map_write( map, CMD(0xF0), chip->start );
 		/* FIXME - should have reset delay before continuing */
 
+		if (++retry_cnt <= MAX_ERASE_RETRY)
+			goto retry;
+
 		ret = -EIO;
 	}
 
@@ -2329,6 +2335,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 	unsigned long timeo = jiffies + HZ;
 	DECLARE_WAITQUEUE(wait, current);
 	int ret = 0;
+	int retry_cnt = 0;
 
 	adr += chip->start;
 
@@ -2346,6 +2353,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 	ENABLE_VPP(map);
 	xip_disable(map, chip, adr);
 
+ retry:
 	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL);
 	cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
@@ -2402,6 +2410,9 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 		map_write( map, CMD(0xF0), chip->start );
 		/* FIXME - should have reset delay before continuing */
 
+		if (++retry_cnt <= MAX_ERASE_RETRY)
+			goto retry;
+
 		ret = -EIO;
 	}
 
-- 
2.16.1

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] mtd: cfi_cmdset_0002: Change erase functions to retry for error
  2018-05-09 10:26           ` IKEGAMI Tokunori
@ 2018-05-09 11:45             ` Boris Brezillon
  2018-05-09 13:15               ` IKEGAMI Tokunori
  0 siblings, 1 reply; 13+ messages in thread
From: Boris Brezillon @ 2018-05-09 11:45 UTC (permalink / raw)
  To: IKEGAMI Tokunori
  Cc: Joakim Tjernlund, boris.brezillon, richard, marek.vasut,
	PACKHAM Chris, linux-mtd, cyrille.pitchen, computersforpeace,
	dwmw2

On Wed, 9 May 2018 10:26:22 +0000
IKEGAMI Tokunori <ikegami@allied-telesis.co.jp> wrote:

> From: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
> 
> For the word write functions it is retried for error.
> But it is not implemented to retry for the erase functions.
> To make sure for the erase functions change to retry as same.
> 
> This is needed to prevent the flash erase error caused only once.
> It was caused by the error case of chip_good() in the do_erase_oneblock().
> Also it was confirmed on the MACRONIX flash device MX29GL512FHT2I-11G.
> But the error issue behavior is not able to reproduce at this moment.

Sorry, but until you've figured out what was going on here, I'm
reluctant to apply this patch. Are you even sure it fixes the problem
you had? Hard to tell since you're not able to reproduce the issue.

> The flash controller is parallel Flash interface integrated on BCM53003.
> 
> Signed-off-by: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
> Cc: Chris Packham <chris.packham@alliedtelesis.co.nz>
> Cc: Brian Norris <computersforpeace@gmail.com>
> Cc: David Woodhouse <dwmw2@infradead.org>
> Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
> Cc: Marek Vasut <marek.vasut@gmail.com>
> Cc: Richard Weinberger <richard@nod.at>
> Cc: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
> Cc: linux-mtd@lists.infradead.org
> ---
>  drivers/mtd/chips/cfi_cmdset_0002.c | 11 +++++++++++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
> index 56aa6b75213d..f522a856526a 100644
> --- a/drivers/mtd/chips/cfi_cmdset_0002.c
> +++ b/drivers/mtd/chips/cfi_cmdset_0002.c
> @@ -43,6 +43,7 @@
>  #define FORCE_WORD_WRITE 0
>  
>  #define MAX_WORD_RETRIES 3
> +#define MAX_ERASE_RETRY 3

You renamed it MAX_RETRY in a previous patch. Better send patches in
the same patch series when there are dependencies.

>  
>  #define SST49LF004B	        0x0060
>  #define SST49LF040B	        0x0050
> @@ -2240,6 +2241,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
>  	unsigned long int adr;
>  	DECLARE_WAITQUEUE(wait, current);
>  	int ret = 0;
> +	int retry_cnt = 0;
>  
>  	adr = cfi->addr_unlock1;
>  
> @@ -2257,6 +2259,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
>  	ENABLE_VPP(map);
>  	xip_disable(map, chip, adr);
>  
> + retry:
>  	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
>  	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL);
>  	cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
> @@ -2310,6 +2313,9 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
>  		map_write( map, CMD(0xF0), chip->start );
>  		/* FIXME - should have reset delay before continuing */
>  
> +		if (++retry_cnt <= MAX_ERASE_RETRY)
> +			goto retry;
> +
>  		ret = -EIO;
>  	}
>  
> @@ -2329,6 +2335,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
>  	unsigned long timeo = jiffies + HZ;
>  	DECLARE_WAITQUEUE(wait, current);
>  	int ret = 0;
> +	int retry_cnt = 0;
>  
>  	adr += chip->start;
>  
> @@ -2346,6 +2353,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
>  	ENABLE_VPP(map);
>  	xip_disable(map, chip, adr);
>  
> + retry:
>  	cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
>  	cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, cfi->device_type, NULL);
>  	cfi_send_gen_cmd(0x80, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
> @@ -2402,6 +2410,9 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
>  		map_write( map, CMD(0xF0), chip->start );
>  		/* FIXME - should have reset delay before continuing */
>  
> +		if (++retry_cnt <= MAX_ERASE_RETRY)
> +			goto retry;
> +
>  		ret = -EIO;
>  	}
>  

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH] mtd: cfi_cmdset_0002: Change erase functions to retry for error
  2018-05-09 11:45             ` Boris Brezillon
@ 2018-05-09 13:15               ` IKEGAMI Tokunori
  0 siblings, 0 replies; 13+ messages in thread
From: IKEGAMI Tokunori @ 2018-05-09 13:15 UTC (permalink / raw)
  To: Boris Brezillon
  Cc: Joakim Tjernlund, boris.brezillon, richard, marek.vasut,
	PACKHAM Chris, linux-mtd, cyrille.pitchen, computersforpeace,
	dwmw2

Hi Boris-san,

Thank you so much for the comments.

> Sorry, but until you've figured out what was going on here, I'm
> reluctant to apply this patch. Are you even sure it fixes the problem
> you had? Hard to tell since you're not able to reproduce the issue.

  Okay I will do retry to reproduce the issue again.
  But probably it may take a little time to reproduce the issue.

> You renamed it MAX_RETRY in a previous patch. Better send patches in
> the same patch series when there are dependencies.

  I see about this.

By the way mainly I would like to fix the patch below for the write error issue.
  <http://lists.infradead.org/pipermail/linux-mtd/2018-May/080794.html>

The sender address of patch is not good so I will do send the patch again.
So please review the patch also later.

Regards,
Ikegami

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2018-05-09 13:16 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-08 10:20 [PATCH] mtd: cfi_cmdset_0002: Change erase functions to retry for error IKEGAMI Tokunori
2018-05-08 11:20 ` Boris Brezillon
2018-05-08 14:15   ` IKEGAMI Tokunori
2018-05-08 16:52   ` IKEGAMI Tokunori
2018-05-08 17:11     ` Boris Brezillon
2018-05-08 17:35       ` IKEGAMI Tokunori
2018-05-08 18:02         ` Boris Brezillon
2018-05-09  0:27           ` IKEGAMI Tokunori
2018-05-09  8:18       ` Joakim Tjernlund
2018-05-09 10:20         ` IKEGAMI Tokunori
2018-05-09 10:26           ` IKEGAMI Tokunori
2018-05-09 11:45             ` Boris Brezillon
2018-05-09 13:15               ` IKEGAMI Tokunori

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.