linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 3/4] mtd: nand: Add support for Evatronix NANDFLASH-CTRL
@ 2016-06-02  7:48 Ricard Wanderlof
  2016-06-03 14:59 ` Boris Brezillon
  0 siblings, 1 reply; 18+ messages in thread
From: Ricard Wanderlof @ 2016-06-02  7:48 UTC (permalink / raw)
  To: Brian Norris, David Woodhouse, Benoit Cousson, Tony Lindgren,
	David Woodhouse
  Cc: Linux mtd, devicetree, linux-kernel


Driver for the Evatronix NANDFLASH-CTRL NAND flash controller IP. This
controller is used in the Axis ARTPEC-6 SoC.

The driver supports BCH ECC using the controller's hardware, but there is
also an option to use software BCH ECC. However, the ECC layouts are not
compatible so it's not possible to mix them. The main advantage to using
software ECC is that there are more OOB bytes free, as the hardware is
slightly wasteful on OOB space.

BCH ECC from 4 to 32 bits over 256, 512 or 1024 byte ECC blocks is supported.

Only large-page flash chips are supported, using 4 or 5 address cycles.

The driver has been extensively tested using hardware ECC on 2 Mbit flash chips,
with 8 bit ECC over 512 bytes ECC blocks.

Signed-off-by: Ricard Wanderlof <ricardw@axis.com>
---
 drivers/mtd/nand/Kconfig          |    6 +
 drivers/mtd/nand/Makefile         |    1 +
 drivers/mtd/nand/evatronix_nand.c | 1909 +++++++++++++++++++++++++++++++++++++
 3 files changed, 1916 insertions(+)
 create mode 100644 drivers/mtd/nand/evatronix_nand.c

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index f05e0e9..30fba73 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -295,6 +295,12 @@ config MTD_NAND_DOCG4
 	  by the block containing the saftl partition table.  This is probably
 	  typical.
 
+config MTD_NAND_EVATRONIX
+	tristate "Enable Evatronix NANDFLASH-CTRL driver"
+	help
+	  NAND hardware driver for Evatronix NANDFLASH-CTRL
+	  NAND flash controller.
+
 config MTD_NAND_SHARPSL
 	tristate "Support for NAND Flash on Sharp SL Series (C7xx + others)"
 	depends on ARCH_PXA
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index f553353..ac89b12 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_MTD_NAND_S3C2410)		+= s3c2410.o
 obj-$(CONFIG_MTD_NAND_DAVINCI)		+= davinci_nand.o
 obj-$(CONFIG_MTD_NAND_DISKONCHIP)	+= diskonchip.o
 obj-$(CONFIG_MTD_NAND_DOCG4)		+= docg4.o
+obj-$(CONFIG_MTD_NAND_EVATRONIX)	+= evatronix_nand.o
 obj-$(CONFIG_MTD_NAND_FSMC)		+= fsmc_nand.o
 obj-$(CONFIG_MTD_NAND_SHARPSL)		+= sharpsl.o
 obj-$(CONFIG_MTD_NAND_NANDSIM)		+= nandsim.o
diff --git a/drivers/mtd/nand/evatronix_nand.c b/drivers/mtd/nand/evatronix_nand.c
new file mode 100644
index 0000000..94eb582
--- /dev/null
+++ b/drivers/mtd/nand/evatronix_nand.c
@@ -0,0 +1,1909 @@
+/*
+ * evatronix_nand.c - NAND Flash Driver for Evatronix NANDFLASH-CTRL
+ * NAND Flash Controller IP.
+ *
+ * Intended to handle one NFC, with up to two connected NAND flash chips,
+ * one per bank.
+ *
+ * This implementation has been designed against Rev 1.15 and Rev 1.16 of the
+ * NANDFLASH-CTRL Design Specification.
+ * Note that Rev 1.15 specifies up to 8 chip selects, whereas Rev 1.16
+ * only specifies one. We keep the definitions for the multiple chip
+ * selects though for future reference.
+ *
+ * The corresponding IP version is NANDFLASH-CTRL-DES-6V09H02RE08 .
+ *
+ * Copyright (c) 2016 Axis Communication AB, Lund, Sweden.
+ * Portions Copyright (c) 2010 ST Microelectronics
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <asm/dma.h>
+#include <linux/bitops.h> /* for ffs() */
+#include <linux/io.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/concat.h>
+#include <linux/mtd/partitions.h>
+#include <linux/version.h>
+
+/* Driver configuration */
+
+/* Some of this could potentially be moved to DT, but it represents stuff
+ * that is either untested, only used for debugging, or things we really
+ * don't want anyone to change, so we keep it here until a clear use case
+ * emerges.
+ */
+
+#undef NFC_DMA64BIT /* NFC hardware support for 64-bit DMA transfers */
+
+#undef POLLED_XFERS /* Use polled rather than interrupt based transfers */
+
+#undef CLEAR_DMA_BUF_AFTER_WRITE /* Useful for debugging */
+
+/* DMA buffer for page transfers. */
+#define DMA_BUF_SIZE (8192 + 640) /* main + spare for 8k page flash */
+
+/* # bytes into the OOB we put our ECC */
+#define ECC_OFFSET 2
+
+/* Number of bytes that we read using READID command.
+ * When reading IDs the IP requires us set up the number of bytes to read
+ * prior to executing the operation, whereas the NAND subsystem would rather
+ * like us to be able to read one byte at a time from the chip. So we fake
+ * this by reading a set number of ID bytes, and then let the NAND subsystem
+ * read from our DMA buffer.
+ */
+#define READID_LENGTH 8
+
+/* Debugging */
+
+#define MTD_TRACE(FORMAT, ...) pr_debug("%s: " FORMAT, __func__, ## __VA_ARGS__)
+
+/* Register offsets for Evatronix NANDFLASH-CTRL IP */
+
+/* Register field shift values and masks are interespersed as it makes
+ * them easier to locate.
+ *
+ * We use shift values rather than direct masks (e.g. 0x0000d000), as the
+ * hardware manual lists the bit number, making the definitions below
+ * easier to verify against the manual.
+ *
+ * All (known) registers are here, but we only put in the bit fields
+ * for the fields we need.
+ *
+ * We try to be consistent regarding _SIZE/_MASK/_value macros so as to
+ * get a consistent layout here, except for trivial cases where there is
+ * only a single bit or field in a register at bit offset 0.
+ */
+
+#define COMMAND_REG		0x00
+/* The masks reflect the input data to the MAKE_COMMAND macro, rather than
+ * the bits in the register itself. These macros are not intended to be
+ * used by the user, who should use the MAKE_COMMAND et al macros.
+ */
+#define _CMD_SEQ_SHIFT			0
+#define _INPUT_SEL_SHIFT		6
+#define _DATA_SEL_SHIFT			7
+#define _CMD_0_SHIFT			8
+#define _CMD_1_3_SHIFT			16
+#define _CMD_2_SHIFT			24
+
+#define _CMD_SEQ_MASK			0x3f
+#define _INPUT_SEL_MASK			1
+#define _DATA_SEL_MASK			1
+#define _CMD_MASK			0xff /* for all CMD_foo */
+
+#define MAKE_COMMAND(CMD_SEQ, INPUT_SEL, DATA_SEL, CMD_0, CMD_1_3, CMD_2) \
+	((((CMD_SEQ)	& _CMD_SEQ_MASK)	<< _CMD_SEQ_SHIFT)	| \
+	 (((INPUT_SEL)	& _INPUT_SEL_MASK)	<< _INPUT_SEL_SHIFT)	| \
+	 (((DATA_SEL)	& _DATA_SEL_MASK)	<< _DATA_SEL_SHIFT)	| \
+	 (((CMD_0)	& _CMD_MASK)		<< _CMD_0_SHIFT)	| \
+	 (((CMD_1_3)	& _CMD_MASK)		<< _CMD_1_3_SHIFT)	| \
+	 (((CMD_2)	& _CMD_MASK)		<< _CMD_2_SHIFT))
+
+#define INPUT_SEL_SIU			0
+#define INPUT_SEL_DMA			1
+#define DATA_SEL_FIFO			0
+#define DATA_SEL_DATA_REG		1
+
+#define CONTROL_REG		0x04
+#define CONTROL_BLOCK_SIZE_32		(0 << 6)
+#define CONTROL_BLOCK_SIZE_64		(1 << 6)
+#define CONTROL_BLOCK_SIZE_128		(2 << 6)
+#define CONTROL_BLOCK_SIZE_256		(3 << 6)
+#define CONTROL_BLOCK_SIZE(SIZE)	((ffs(SIZE) - 6) << 6)
+#define CONTROL_ECC_EN			(1 << 5)
+#define CONTROL_INT_EN			(1 << 4)
+#define CONTROL_ECC_BLOCK_SIZE_256	(0 << 1)
+#define CONTROL_ECC_BLOCK_SIZE_512	(1 << 1)
+#define CONTROL_ECC_BLOCK_SIZE_1024	(2 << 1)
+#define CONTROL_ECC_BLOCK_SIZE(SIZE)	((ffs(SIZE) - 9) << 1)
+#define STATUS_REG		0x08
+#define STATUS_MEM_ST(CS)		(1 << (CS))
+#define STATUS_CTRL_STAT		(1 << 8)
+#define STATUS_MASK_REG		0x0C
+#define STATE_MASK_SHIFT		0
+#define STATUS_MASK_STATE_MASK(MASK)	(((MASK) & 0xff) << STATE_MASK_SHIFT)
+#define ERROR_MASK_SHIFT		8
+#define STATUS_MASK_ERROR_MASK(MASK)	(((MASK) & 0xff) << ERROR_MASK_SHIFT)
+#define INT_MASK_REG		0x10
+#define INT_MASK_ECC_INT_EN(CS)		(1 << (24 + (CS)))
+#define INT_MASK_STAT_ERR_INT_EN(CS)	(1 << (16 + (CS)))
+#define INT_MASK_MEM_RDY_INT_EN(CS)	(1 << (8 + (CS)))
+#define INT_MASK_DMA_INT_EN		(1 << 3)
+#define INT_MASK_DATA_REG_EN		(1 << 2)
+#define INT_MASK_CMD_END_INT_EN		(1 << 1)
+#define INT_STATUS_REG		0x14
+#define INT_STATUS_ECC_INT_FL(CS)	(1 << (24 + (CS)))
+#define INT_STATUS_STAT_ERR_INT_FL(CS)	(1 << (16 + (CS)))
+#define INT_STATUS_MEM_RDY_INT_FL(CS)	(1 << (8 + (CS)))
+#define INT_STATUS_DMA_INT_FL		(1 << 3)
+#define INT_STATUS_DATA_REG_FL		(1 << 2)
+#define INT_STATUS_CMD_END_INT_FL	(1 << 1)
+#define ECC_CTRL_REG		0x18
+#define ECC_CTRL_ECC_CAP_2		(0 << 0)
+#define ECC_CTRL_ECC_CAP_4		(1 << 0)
+#define ECC_CTRL_ECC_CAP_8		(2 << 0)
+#define ECC_CTRL_ECC_CAP_16		(3 << 0)
+#define ECC_CTRL_ECC_CAP_24		(4 << 0)
+#define ECC_CTRL_ECC_CAP_32		(5 << 0)
+#define ECC_CTRL_ECC_CAP(B)		((B) < 24 ? ffs(B) - 2 : (B) / 6)
+/* # ECC corrections that are acceptable during read before setting OVER flag */
+#define ECC_CTRL_ECC_THRESHOLD(VAL)	(((VAL) & 0x3f) << 8)
+#define ECC_OFFSET_REG		0x1C
+#define ECC_STAT_REG		0x20
+/* Correctable error flag(s) */
+#define ECC_STAT_ERROR(CS)		(1 << (0 + (CS)))
+/* Uncorrectable error flag(s) */
+#define ECC_STAT_UNC(CS)		(1 << (8 + (CS)))
+/* Acceptable errors level overflow flag(s) */
+#define ECC_STAT_OVER(CS)		(1 << (16 + (CS)))
+#define ADDR0_COL_REG		0x24
+#define ADDR0_ROW_REG		0x28
+#define ADDR1_COL_REG		0x2C
+#define ADDR1_ROW_REG		0x30
+#define PROTECT_REG		0x34
+#define FIFO_DATA_REG		0x38
+#define DATA_REG_REG		0x3C
+#define DATA_REG_SIZE_REG	0x40
+#define DATA_REG_SIZE_DATA_REG_SIZE(SIZE) (((SIZE) - 1) & 3)
+#define DEV0_PTR_REG		0x44
+#define DEV1_PTR_REG		0x48
+#define DEV2_PTR_REG		0x4C
+#define DEV3_PTR_REG		0x50
+#define DEV4_PTR_REG		0x54
+#define DEV5_PTR_REG		0x58
+#define DEV6_PTR_REG		0x5C
+#define DEV7_PTR_REG		0x60
+#define DMA_ADDR_L_REG		0x64
+#define DMA_ADDR_H_REG		0x68
+#define DMA_CNT_REG		0x6C
+#define DMA_CTRL_REG		0x70
+#define DMA_CTRL_DMA_START		(1 << 7) /* start on command */
+#define DMA_CTRL_DMA_MODE_SG		(1 << 5) /* scatter/gather mode */
+#define DMA_CTRL_DMA_BURST_I_P_4	(0 << 2) /* incr. precise burst */
+#define DMA_CTRL_DMA_BURST_S_P_16	(1 << 2) /* stream precise burst */
+#define DMA_CTRL_DMA_BURST_SINGLE	(2 << 2) /* single transfer */
+#define DMA_CTRL_DMA_BURST_UNSPEC	(3 << 2) /* burst of unspec. length */
+#define DMA_CTRL_DMA_BURST_I_P_8	(4 << 2) /* incr. precise burst */
+#define DMA_CTRL_DMA_BURST_I_P_16	(5 << 2) /* incr. precise burst */
+#define DMA_CTRL_ERR_FLAG		(1 << 1) /* read only */
+#define DMA_CTRL_DMA_READY		(1 << 0) /* read only */
+#define BBM_CTRL_REG		0x74
+#define MEM_CTRL_REG		0x80
+#define MEM_CTRL_MEM_CE(CE)		(((CE) & 7) << 0)
+#define MEM_CTRL_BANK_SEL(BANK)		(((BANK) & 7) << 16)
+#define MEM_CTRL_MEM0_WR	BIT(8)
+#define DATA_SIZE_REG		0x84
+#define TIMINGS_ASYN_REG	0x88
+#define TIMINGS_SYN_REG		0x8C
+#define TIME_SEQ_0_REG		0x90
+#define TIME_SEQ_1_REG		0x94
+#define TIME_GEN_SEQ_0_REG	0x98
+#define TIME_GEN_SEQ_1_REG	0x9C
+#define TIME_GEN_SEQ_2_REG	0xA0
+#define FIFO_INIT_REG		0xB0
+#define FIFO_INIT_FIFO_INIT			1 /* Flush FIFO */
+#define FIFO_STATE_REG		0xB4
+#define FIFO_STATE_DF_W_EMPTY		(1 << 7)
+#define FIFO_STATE_DF_R_FULL		(1 << 6)
+#define FIFO_STATE_CF_ACCPT_W		(1 << 5)
+#define FIFO_STATE_CF_ACCPT_R		(1 << 4)
+#define FIFO_STATE_CF_FULL		(1 << 3)
+#define FIFO_STATE_CF_EMPTY		(1 << 2)
+#define FIFO_STATE_DF_W_FULL		(1 << 1)
+#define FIFO_STATE_DF_R_EMPTY		(1 << 0)
+#define GEN_SEQ_CTRL_REG	0xB8		/* aka GENERIC_SEQ_CTRL */
+#define _CMD0_EN_SHIFT			0
+#define _CMD1_EN_SHIFT			1
+#define _CMD2_EN_SHIFT			2
+#define _CMD3_EN_SHIFT			3
+#define _COL_A0_SHIFT			4
+#define _COL_A1_SHIFT			6
+#define _ROW_A0_SHIFT			8
+#define _ROW_A1_SHIFT			10
+#define _DATA_EN_SHIFT			12
+#define _DELAY_EN_SHIFT			13
+#define _IMD_SEQ_SHIFT			15
+#define _CMD3_SHIFT			16
+#define ECC_CNT_REG		0x14C
+#define ECC_CNT_ERR_LVL_MASK		0x3F
+
+#define _CMD0_EN_MASK			1
+#define _CMD1_EN_MASK			1
+#define _CMD2_EN_MASK			1
+#define _CMD3_EN_MASK			1
+#define _COL_A0_MASK			3
+#define _COL_A1_MASK			3
+#define _ROW_A0_MASK			3
+#define _ROW_A1_MASK			3
+#define _DATA_EN_MASK			1
+#define _DELAY_EN_MASK			3
+#define _IMD_SEQ_MASK			1
+#define _CMD3_MASK			0xff
+
+/* DELAY_EN field values, non-shifted */
+#define _BUSY_NONE			0
+#define _BUSY_0				1
+#define _BUSY_1				2
+
+/* Slightly confusingly, the DELAYx_EN fields enable BUSY phases. */
+#define MAKE_GEN_CMD(CMD0_EN, CMD1_EN, CMD2_EN, CMD3_EN, \
+		     COL_A0, ROW_A0, COL_A1, ROW_A1, \
+		     DATA_EN, BUSY_EN, IMMEDIATE_SEQ, CMD3) \
+	((((CMD0_EN)	& _CMD0_EN_MASK)	<< _CMD0_EN_SHIFT)	| \
+	 (((CMD1_EN)	& _CMD1_EN_MASK)	<< _CMD1_EN_SHIFT)	| \
+	 (((CMD2_EN)	& _CMD2_EN_MASK)	<< _CMD2_EN_SHIFT)	| \
+	 (((CMD3_EN)	& _CMD3_EN_MASK)	<< _CMD3_EN_SHIFT)	| \
+	 (((COL_A0)	& _COL_A0_MASK)		<< _COL_A0_SHIFT)	| \
+	 (((COL_A1)	& _COL_A1_MASK)		<< _COL_A1_SHIFT)	| \
+	 (((ROW_A0)	& _ROW_A0_MASK)		<< _ROW_A0_SHIFT)	| \
+	 (((ROW_A1)	& _ROW_A1_MASK)		<< _ROW_A1_SHIFT)	| \
+	 (((DATA_EN)	& _DATA_EN_MASK)	<< _DATA_EN_SHIFT)	| \
+	 (((BUSY_EN)	& _DELAY_EN_MASK)	<< _DELAY_EN_SHIFT)	| \
+	 (((IMMEDIATE_SEQ) & _IMD_SEQ_MASK)	<< _IMD_SEQ_SHIFT)	| \
+	 (((CMD3)	& _CMD3_MASK)		<< _CMD3_SHIFT))
+
+/* The sequence encodings are not trivial. The ones we use are listed here. */
+#define _SEQ_0			0x00 /* send one cmd, then wait for ready */
+#define _SEQ_1			0x21 /* send one cmd, one addr, fetch data */
+#define _SEQ_2			0x22 /* send one cmd, one addr, fetch data */
+#define _SEQ_4			0x24 /* single cycle write then read */
+#define _SEQ_10			0x2A /* read page */
+#define _SEQ_12			0x0C /* write page, don't wait for R/B */
+#define _SEQ_18			0x32 /* read page using general cycle */
+#define _SEQ_19			0x13 /* write page using general cycle */
+#define _SEQ_14			0x0E /* 3 address cycles, for block erase */
+
+#define MLUN_REG		0xBC
+#define DEV0_SIZE_REG		0xC0
+#define DEV1_SIZE_REG		0xC4
+#define DEV2_SIZE_REG		0xC8
+#define DEV3_SIZE_REG		0xCC
+#define DEV4_SIZE_REG		0xD0
+#define DEV5_SIZE_REG		0xD4
+#define DEV6_SIZE_REG		0xD8
+#define DEV7_SIZE_REG		0xDC
+#define SS_CCNT0_REG		0xE0
+#define SS_CCNT1_REG		0xE4
+#define SS_SCNT_REG		0xE8
+#define SS_ADDR_DEV_CTRL_REG	0xEC
+#define SS_CMD0_REG		0xF0
+#define SS_CMD1_REG		0xF4
+#define SS_CMD2_REG		0xF8
+#define SS_CMD3_REG		0xFC
+#define SS_ADDR_REG		0x100
+#define SS_MSEL_REG		0x104
+#define SS_REQ_REG		0x108
+#define SS_BRK_REG		0x10C
+#define DMA_TLVL_REG		0x114
+#define DMA_TLVL_MAX		0xFF
+#define AES_CTRL_REG		0x118
+#define AES_DATAW_REG		0x11C
+#define AES_SVECT_REG		0x120
+#define CMD_MARK_REG		0x124
+#define LUN_STATUS_0_REG	0x128
+#define LUN_STATUS_1_REG	0x12C
+#define TIMINGS_TOGGLE_REG	0x130
+#define TIME_GEN_SEQ_3_REG	0x134
+#define SQS_DELAY_REG		0x138
+#define CNE_MASK_REG		0x13C
+#define CNE_VAL_REG		0x140
+#define CNA_CTRL_REG		0x144
+#define INTERNAL_STATUS_REG	0x148
+#define ECC_CNT_REG		0x14C
+#define PARAM_REG_REG		0x150
+
+/* NAND flash command generation */
+
+/* NAND flash command codes */
+#define NAND_RESET		0xff
+#define NAND_READ_STATUS	0x70
+#define NAND_READ_ID		0x90
+#define NAND_READ_ID_ADDR_STD	0x00	/* address written to ADDR0_COL */
+#define NAND_READ_ID_ADDR_ONFI	0x20	/* address written to ADDR0_COL */
+#define NAND_READ_ID_ADDR_JEDEC	0x40	/* address written to ADDR0_COL */
+#define NAND_PARAM		0xEC
+#define NAND_PARAM_SIZE_MAX		768 /* bytes */
+#define NAND_PAGE_READ		0x00
+#define NAND_PAGE_READ_END	0x30
+#define NAND_BLOCK_ERASE	0x60
+#define NAND_BLOCK_ERASE_END	0xd0
+#define NAND_PAGE_WRITE		0x80
+#define NAND_PAGE_WRITE_END	0x10
+
+#define _DONT_CARE 0x00 /* When we don't have anything better to say */
+
+
+/* Assembled values for putting into COMMAND register */
+
+/* Reset NAND flash */
+
+/* Uses SEQ_0: non-directional sequence, single command, wait for ready */
+#define COMMAND_RESET \
+	MAKE_COMMAND(_SEQ_0, INPUT_SEL_SIU, DATA_SEL_FIFO, \
+		NAND_RESET, _DONT_CARE, _DONT_CARE)
+
+/* Read status */
+
+/* Uses SEQ_4: single command, then read data via DATA_REG */
+#define COMMAND_READ_STATUS \
+	MAKE_COMMAND(_SEQ_4, INPUT_SEL_SIU, DATA_SEL_DATA_REG, \
+		NAND_READ_STATUS, _DONT_CARE, _DONT_CARE)
+
+/* Read ID */
+
+/* Uses SEQ_1: single command, ADDR0_COL, then read data via FIFO */
+/* ADDR0_COL is set to NAND_READ_ID_ADDR_STD for non-ONFi, and
+ * NAND_READ_ID_ADDR_ONFI for ONFi.
+ * The controller reads 5 bytes in the non-ONFi case, and 4 bytes in the
+ * ONFi case, so the data reception (DMA or FIFO_REG) needs to be set up
+ * accordingly.
+ */
+#define COMMAND_READ_ID \
+	MAKE_COMMAND(_SEQ_1, INPUT_SEL_DMA, DATA_SEL_FIFO, \
+		NAND_READ_ID, _DONT_CARE, _DONT_CARE)
+
+#define COMMAND_PARAM \
+	MAKE_COMMAND(_SEQ_2, INPUT_SEL_DMA, DATA_SEL_FIFO, \
+		NAND_PARAM, _DONT_CARE, _DONT_CARE)
+
+/* Page read via slave interface (FIFO_DATA register) */
+
+/* Standard 5-cycle read command, with 0x30 end-of-cycle marker */
+/* Uses SEQ_10: CMD0 + 5 address cycles + CMD2, read data */
+#define COMMAND_READ_PAGE_STD \
+	MAKE_COMMAND(_SEQ_10, INPUT_SEL_SIU, DATA_SEL_FIFO, \
+		NAND_PAGE_READ, _DONT_CARE, NAND_PAGE_READ_END)
+
+/* 4-cycle read command, together with GEN_SEQ_CTRL_READ_PAGE_4CYCLE */
+/* Uses SEQ_18 (generic command sequence, see GEN_SEQ_ECTRL_READ_PAGE_4CYCLE)):
+ * CMD0 + 2+2 address cycles + CMD2, read data
+ */
+#define COMMAND_READ_PAGE_GEN \
+	MAKE_COMMAND(_SEQ_18, INPUT_SEL_SIU, DATA_SEL_FIFO, \
+		NAND_PAGE_READ, _DONT_CARE, NAND_PAGE_READ_END)
+
+/* Page read via master interface (DMA) */
+
+/* Standard 5-cycle read command, with 0x30 end-of-cycle marker */
+/* Uses SEQ_10: CMD0 + 5 address cycles + CMD2, read data */
+#define COMMAND_READ_PAGE_DMA_STD \
+	MAKE_COMMAND(_SEQ_10, INPUT_SEL_DMA, DATA_SEL_FIFO, \
+		NAND_PAGE_READ, _DONT_CARE, NAND_PAGE_READ_END)
+
+/* 4-cycle read command, together with GEN_SEQ_CTRL_READ_PAGE_4CYCLE */
+/* Uses SEQ_18 (generic command sequence, see GEN_SEQ_ECTRL_READ_PAGE_4CYCLE)):
+ * CMD0 + 2+2 address cycles + CMD2, read data
+ */
+#define COMMAND_READ_PAGE_DMA_GEN \
+	MAKE_COMMAND(_SEQ_18, INPUT_SEL_DMA, DATA_SEL_FIFO, \
+		NAND_PAGE_READ, _DONT_CARE, NAND_PAGE_READ_END)
+
+/* Page write via master interface (DMA) */
+
+/* Uses SEQ_12: CMD0 + 5 address cycles + write data + CMD1 */
+#define COMMAND_WRITE_PAGE_DMA_STD \
+	MAKE_COMMAND(_SEQ_12, INPUT_SEL_DMA, DATA_SEL_FIFO, \
+		NAND_PAGE_WRITE, NAND_PAGE_WRITE_END, _DONT_CARE)
+
+/* Uses SEQ_19: CMD0 + 4 address cycles + write data + CMD1 */
+#define COMMAND_WRITE_PAGE_DMA_GEN \
+	MAKE_COMMAND(_SEQ_19, INPUT_SEL_DMA, DATA_SEL_FIFO, \
+		NAND_PAGE_WRITE, NAND_PAGE_WRITE_END, _DONT_CARE)
+
+/* Block erase */
+
+/* Uses SEQ_14: CMD0 + 3 address cycles + CMD1 */
+#define COMMAND_BLOCK_ERASE \
+	MAKE_COMMAND(_SEQ_14, INPUT_SEL_SIU, DATA_SEL_FIFO, \
+		NAND_BLOCK_ERASE, NAND_BLOCK_ERASE_END, _DONT_CARE)
+
+/* Assembled values for putting into GEN_SEQ_CTRL register */
+
+/* General command sequence specification for 4 cycle PAGE_READ command */
+#define GEN_SEQ_CTRL_READ_PAGE_4CYCLE \
+	MAKE_GEN_CMD(1, 0, 1, 0,	/* enable command 0 and 2 phases */ \
+		     2, 2,		/* col A0 2 cycles, row A0 2 cycles */ \
+		     0, 0,		/* col A1, row A1 not used */ \
+		     1,			/* data phase enabled */ \
+		     _BUSY_0,		/* busy0 phase enabled */ \
+		     0,			/* immediate cmd execution disabled */ \
+		     _DONT_CARE)	/* command 3 code not needed */
+
+/* General command sequence specification for 4 cycle PAGE_PROGRAM command */
+#define GEN_SEQ_CTRL_WRITE_PAGE_4CYCLE \
+	MAKE_GEN_CMD(1, 1, 0, 0,	/* enable command 0 and 1 phases */ \
+		     2, 2,		/* col A0 2 cycles, row A0 2 cycles */ \
+		     0, 0,		/* col A1, row A1 not used */ \
+		     1,			/* data phase enabled */ \
+		     _BUSY_1,		/* busy1 phase enabled */ \
+		     0,			/* immediate cmd execution disabled */ \
+		     _DONT_CARE)	/* command 3 code not needed */
+
+/* BCH ECC size calculations. */
+/* From "Mr. NAND's Wild Ride: Warning: Suprises Ahead", by Robert Pierce,
+ * Denali Software Inc. 2009, table on page 5
+ */
+/* Use 8 bit correction as base. */
+#define ECC8_BYTES(BLKSIZE) (ffs(BLKSIZE) + 3)
+/* The following would be valid for 4..24 bits of correction. */
+#define ECC_BYTES_PACKED(CAP, BLKSIZE) ((ECC8_BYTES(BLKSIZE) * (CAP) + 7) / 8)
+/* Our hardware however requires more bytes than strictly necessary due to
+ * the internal design.
+ */
+#define ECC_BYTES(CAP, BLKSIZE) ((ECC8_BYTES(1024) * (CAP) + 7) / 8)
+
+/* Read modes */
+enum nfc_read_mode {
+	NFC_READ_STD, /* Standard page read with ECC */
+	NFC_READ_RAW, /* Raw mode read of main area without ECC */
+	NFC_READ_OOB, /* Read oob only (no ECC) */
+	NFC_READ_ALL  /* Read main+oob in raw mode (no ECC) */
+};
+
+/* Timing parameters, from DT */
+struct nfc_timings {
+	uint32_t time_seq_0;
+	uint32_t time_seq_1;
+	uint32_t timings_asyn;
+	uint32_t time_gen_seq_0;
+	uint32_t time_gen_seq_1;
+	uint32_t time_gen_seq_2;
+	uint32_t time_gen_seq_3;
+};
+
+/* Configuration, from DT */
+struct nfc_setup {
+	struct nfc_timings timings;
+	bool use_bank_select; /* CE selects 'bank' rather than 'chip' */
+	bool rb_wired_and;    /* Ready/busy wired AND rather than per-chip */
+};
+
+/* DMA buffer, from both software (buf) and hardware (phys) perspective. */
+struct nfc_dma {
+	void *buf; /* mapped address */
+	dma_addr_t phys; /* physical address */
+	int bytes_left; /* how much data left to read from buffer? */
+	int buf_bytes; /* how much allocated data in the buffer? */
+	uint8_t *ptr; /* work pointer */
+};
+
+#ifndef POLLED_XFERS
+/* Interrupt management */
+struct nfc_irq {
+	int done; /* interrupt triggered, consequently we're done. */
+	uint32_t int_status; /* INT_STATUS at time of interrupt */
+	wait_queue_head_t wq; /* For waiting on controller interrupt */
+};
+#endif
+
+/* Information common to all chips, including the NANDFLASH-CTRL IP */
+struct nfc_info {
+	void __iomem *regbase;
+	struct device *dev;
+	struct nand_hw_control *controller;
+	spinlock_t lock;
+	struct nfc_setup *setup;
+	struct nfc_dma dma;
+#ifndef POLLED_XFERS
+	struct nfc_irq irq;
+#endif
+};
+
+/* Per-chip controller configuration */
+struct nfc_config {
+	uint32_t mem_ctrl;
+	uint32_t control;
+	uint32_t ecc_ctrl;
+	uint32_t mem_status_mask;
+	uint32_t cs;
+};
+
+/* Cache for info that we need to save across calls to nfc_command */
+struct nfc_cmd_cache {
+	unsigned int command;
+	int page;
+	int column;
+	int write_size;
+	int oob_required;
+	int write_raw;
+};
+
+/* Information for each physical NAND chip. */
+struct chip_info {
+	struct nand_chip chip;
+	struct nfc_cmd_cache cmd_cache;
+	struct nfc_config nfc_config;
+};
+
+/* What we tell mtd is an mtd_info actually is a complete chip_info */
+#define TO_CHIP_INFO(mtd) ((struct chip_info *)(mtd_to_nand(mtd)))
+
+/* This is a global pointer, as we only support one single instance of the NFC.
+ * For multiple instances, we would need to add nfc_info as a parameter to
+ * several functions, as well as adding it as a member of the chip_info struct.
+ * Since most likely a system would only have one NFC instance, we don't
+ * go all the way implementing that feature now.
+ */
+static struct nfc_info *nfc_info;
+
+/* The timing setup is expected to come via DT. We keep some default timings
+ * here for reference, based on a 100 MHz reference clock.
+ */
+
+static const struct nfc_timings default_mode0_pll_enabled = {
+	0x0d151533, 0x000b0515, 0x00000046,
+	0x00150000, 0x00000000, 0x00000005, 0x00000015 };
+
+/**** Utility routines. */
+
+/* Count the number of 0's in buff up to a max of max_bits */
+/* Used to determine how many bitflips there are in an allegedly erased block */
+static int count_zero_bits(uint8_t *buff, int size, int max_bits)
+{
+	int k, zero_bits = 0;
+
+	for (k = 0; k < size; k++) {
+		zero_bits += hweight8(~buff[k]);
+		if (zero_bits > max_bits)
+			break;
+	}
+
+	return zero_bits;
+}
+
+/**** Low level stuff. Read and write registers, interrupt routine, etc. */
+
+/* Read and write NFC SFR registers */
+
+static uint32_t nfc_read(uint reg_offset)
+{
+	return readl_relaxed(nfc_info->regbase + reg_offset);
+}
+
+static void nfc_write(uint32_t data, uint reg_offset)
+{
+	/* Note: According to NANDFLASH-CTRL Design Specification, rev 1.14,
+	 * p19, the NFC SFR's can only be written when STATUS.CTRL_STAT is 0.
+	 * However, this doesn't seem to be an issue in practice.
+	 */
+	writel_relaxed(data, nfc_info->regbase  + reg_offset);
+}
+
+#ifndef POLLED_XFERS
+static irqreturn_t nfc_irq(int irq, void *device_info)
+{
+	/* Note that device_info = nfc_info, so if we don't want a global
+	 * nfc_info we can get it via device_info.
+	 */
+
+	/* Save interrupt status in case caller wants to check what actually
+	 * happened.
+	 */
+	nfc_info->irq.int_status = nfc_read(INT_STATUS_REG);
+
+	MTD_TRACE("Got interrupt %d, INT_STATUS 0x%08x\n",
+		  irq, nfc_info->irq.int_status);
+
+	/* Note: We can't clear the interrupts by clearing CONTROL.INT_EN,
+	 * as that does not disable the interrupt output port from the
+	 * nfc towards the gic.
+	 */
+	nfc_write(0, INT_STATUS_REG);
+
+	nfc_info->irq.done = 1;
+	wake_up(&nfc_info->irq.wq);
+
+	return IRQ_HANDLED;
+}
+#endif
+
+/* Get resources from platform: register bank mapping, irqs, etc */
+static int nfc_init_resources(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *resource;
+#ifndef POLLED_XFERS
+	int irq;
+	int res;
+#endif
+
+	/* Register base for controller, ultimately from device tree */
+	resource = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!resource) {
+		dev_err(dev, "No register addresses configured!\n");
+		return -ENOMEM;
+	}
+	nfc_info->regbase = devm_ioremap_resource(dev, resource);
+	if (IS_ERR(nfc_info->regbase))
+		return PTR_ERR(nfc_info->regbase);
+
+	dev_dbg(dev, "Got SFRs at phys %p..%p, mapped to %p\n",
+		 (void *)resource->start, (void *)resource->end,
+		 nfc_info->regbase);
+
+	/* A DMA buffer */
+	nfc_info->dma.buf =
+		dma_alloc_coherent(dev, DMA_BUF_SIZE,
+				   &nfc_info->dma.phys, GFP_KERNEL);
+	if (nfc_info->dma.buf == NULL) {
+		dev_err(dev, "dma_alloc_coherent failed!\n");
+		return -ENOMEM;
+	}
+
+	dev_dbg(dev, "DMA buffer %p at physical %p\n",
+		 nfc_info->dma.buf, (void *)nfc_info->dma.phys);
+
+#ifndef POLLED_XFERS
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(dev, "No irq configured\n");
+		return irq;
+	}
+	res = request_irq(irq, nfc_irq, 0, "evatronix-nand", nfc_info);
+	if (res < 0) {
+		dev_err(dev, "request_irq failed\n");
+		return res;
+	}
+	dev_dbg(dev, "Successfully registered IRQ %d\n", irq);
+#endif
+
+	return 0;
+}
+
+/* Write timing setup to controller */
+static void setup_nfc_timing(struct nfc_setup *nfc_setup)
+{
+	nfc_write(nfc_setup->timings.time_seq_0, TIME_SEQ_0_REG);
+	nfc_write(nfc_setup->timings.time_seq_1, TIME_SEQ_1_REG);
+	nfc_write(nfc_setup->timings.timings_asyn, TIMINGS_ASYN_REG);
+	nfc_write(nfc_setup->timings.time_gen_seq_0, TIME_GEN_SEQ_0_REG);
+	nfc_write(nfc_setup->timings.time_gen_seq_1, TIME_GEN_SEQ_1_REG);
+	nfc_write(nfc_setup->timings.time_gen_seq_2, TIME_GEN_SEQ_2_REG);
+	nfc_write(nfc_setup->timings.time_gen_seq_3, TIME_GEN_SEQ_3_REG);
+}
+
+/* Write per-chip specific config to controller */
+static void config_nfc(struct nfc_config *nfc_config, void *ref)
+{
+	static void *saved_ref;
+
+	/* To avoid rewriting these unnecessarily every time, we only do
+	 * it when the ref has changed, or if ref == NULL (=> force).
+	 */
+	if (ref) {
+		if (ref == saved_ref)
+			return;
+		saved_ref = ref; /* only save if non-null */
+	}
+
+	nfc_write(nfc_config->mem_ctrl, MEM_CTRL_REG);
+	nfc_write(nfc_config->control, CONTROL_REG);
+	nfc_write(nfc_config->ecc_ctrl, ECC_CTRL_REG);
+}
+
+
+#ifndef POLLED_XFERS
+/* Set up interrupt and wq, with supplied interrupt mask */
+static void setup_int(uint32_t what)
+{
+	/* Flag waited on by wq */
+	nfc_info->irq.done = 0;
+
+	/* clear interrupt status bits */
+	nfc_write(0, INT_STATUS_REG);
+
+	/* set interrupt mask */
+	nfc_write(what, INT_MASK_REG);
+
+	/* enable global NFC interrupt. Ooooh... */
+	nfc_write(nfc_read(CONTROL_REG) | CONTROL_INT_EN, CONTROL_REG);
+}
+#endif
+
+/* Set up interrupt, send command, then wait for (any bit of) expected state */
+/* Before issuing a command, we could check if the controller is ready.
+ * We can't check INT_STATUS_REG.MEM0_RDY_INT_FL as it is not a status bit,
+ * it is set on an nfc state transition after the completion of for
+ * instance a page program command (so we can use it as a command
+ * completed trigger).
+ * (See NFC Design Spec (rev 1.15) figure 35 for illustration.)
+ * However, we could check STATUS.CTRL_STAT, which should always
+ * be 0 prior to issuing a command, indicating the controller is not
+ * busy, however, this should never be necessary as we always wait for
+ * the controller to finish the previous command before going on with the
+ * next one. If we time out while waiting it means something has gone really
+ * haywire with the controller so it's probably not worth trying to wait
+ * for it to become ready at a later time either.
+ */
+static void command_and_wait(uint32_t nfc_command, uint32_t int_state)
+#ifndef POLLED_XFERS
+{
+	long timeout;
+
+	/* Set up interrupt condition. Here we utilize the fact that the
+	 * bits in INT_STATE are the same as in INT_MASK.
+	 */
+	setup_int(int_state);
+
+	/* Send command */
+	nfc_write(nfc_command, COMMAND_REG);
+
+	/* The timeout should only trigger in abnormal situations, so
+	 * we leave it at one second for now. (nand_base uses 20ms for write
+	 * and 400ms for erase, respectively.)
+	 */
+	/* A special case might be an unconnected flash chip during probe.
+	 * If that causes the timeout to be triggered, we might want to lower
+	 * it, and even make it dependent on the NAND flash command being
+	 * executed.
+	 */
+	timeout = wait_event_timeout(nfc_info->irq.wq, nfc_info->irq.done,
+				     1 * HZ);
+	if (timeout <= 0) {
+		dev_info(nfc_info->dev,
+			 "Request 0x%08x timed out waiting for 0x%08x\n",
+			 nfc_command, int_state);
+	}
+}
+#else /* POLLED_XFERS */
+{
+	int cmd_loops = 0;
+	uint32_t read_status, read_int_status, dma_status;
+
+	/* Clear interrupt status bits */
+	nfc_write(0, INT_STATUS_REG);
+
+	/* Send command */
+	nfc_write(nfc_command, COMMAND_REG);
+
+	/* Wait for command to complete */
+	MTD_TRACE("Waiting for 0x%08x bit(s) to be set in int_status\n",
+		  int_state);
+
+#define MAX_CMD_LOOPS 100000
+	do {
+		cmd_loops++;
+		read_status = nfc_read(STATUS_REG);
+		read_int_status = nfc_read(INT_STATUS_REG);
+		dma_status = nfc_read(DMA_CTRL_REG);
+		MTD_TRACE("Wait for command done: 0x%08x/0x%08x/0x%08x (%d)\n",
+			  read_status, read_int_status, dma_status, cmd_loops);
+	} while (!(read_int_status & int_state) && cmd_loops < MAX_CMD_LOOPS);
+
+	/*
+	 * Ensure that the status is read before any reads to the DMA buffer.
+	 */
+	rmb();
+
+	if (cmd_loops >= MAX_CMD_LOOPS)
+		MTD_TRACE("Int wait for 0x%08x timed out after %d loops: STATUS = 0x%08x, INT_STATUS=0x%08x, DMA_CTRL = 0x%08x, command 0x%08x\n",
+			  int_state, cmd_loops, read_status, read_int_status,
+			  dma_status, nfc_command);
+}
+#endif
+
+/* Initialize DMA, wq and interrupt status for upcoming transfer. */
+static void init_dma(uint64_t addr, int bytes)
+{
+	int dma_trig_level;
+
+	/* DMA control */
+
+	/* Start when COMMAND register written, set burst type/size */
+	nfc_write(DMA_CTRL_DMA_START | DMA_CTRL_DMA_BURST_I_P_4, DMA_CTRL_REG);
+
+	/*
+	 * Ensure that writes to the DMA buffer are done before we tell the
+	 * hardware about it.
+	 */
+	wmb();
+
+	/* DMA address and length */
+#ifdef NFC_DMA64BIT
+	/* The manual says this register does not 'occur' (sic) unless
+	 * 64 bit DMA support is included.
+	 */
+	nfc_write(addr >> 32, DMA_ADDR_H_REG);
+#endif
+	nfc_write(addr, DMA_ADDR_L_REG);
+
+	/* Byte counter */
+	/* Round up to nearest 32-bit word */
+	nfc_write((bytes + 3) & 0xfffffffc, DMA_CNT_REG);
+
+	/* Cap DMA trigger level at FIFO size */
+	dma_trig_level = bytes * 8 / 32; /* 32-bit entities */
+	if (dma_trig_level > DMA_TLVL_MAX)
+		dma_trig_level = DMA_TLVL_MAX;
+	nfc_write(dma_trig_level, DMA_TLVL_REG);
+}
+
+/* Initialize transfer to or from DMA buffer */
+static void init_dmabuf(int bytes)
+{
+	nfc_info->dma.ptr = nfc_info->dma.buf;
+	nfc_info->dma.buf_bytes = nfc_info->dma.bytes_left = bytes;
+}
+
+/* Initialize controller for DATA_REG readout */
+static void init_dreg_read(int bytes)
+{
+	/* Transfer to DATA_REG register */
+	nfc_write(DATA_REG_SIZE_DATA_REG_SIZE(bytes), DATA_REG_SIZE_REG);
+}
+
+/* Set up for ECC if needed */
+static void setup_ecc(struct chip_info *info, int enable_ecc, int column)
+{
+	uint32_t control;
+	struct mtd_info *mtd = nand_to_mtd(&info->chip);
+
+	/* When reading the oob, we never want ECC, when reading the
+	 * main area, it depends.
+	 */
+	control = nfc_read(CONTROL_REG) & ~CONTROL_ECC_EN;
+	if (enable_ecc) {
+		nfc_write(ECC_OFFSET + mtd->writesize +
+			  info->chip.ecc.bytes * column / info->chip.ecc.size,
+			  ECC_OFFSET_REG);
+		control |= CONTROL_ECC_EN;
+	}
+	nfc_write(control, CONTROL_REG);
+}
+
+/* Read from flash using DMA */
+/* Assumes basic setup for DMA has been done previously. */
+/* The MTD framework never reads a complete page (main + oob) in one go
+ * when using HW ECC, so we don't need to support NFC_READ_ALL in this mode.
+ * For SW ECC we read the whole page on one go in ALL mode however.
+ */
+static void read_dma(struct chip_info *info, int page, int column,
+		     enum nfc_read_mode m)
+{
+	int size;
+	uint32_t command;
+	struct mtd_info *mtd = nand_to_mtd(&info->chip);
+
+	switch (m) {
+	case NFC_READ_OOB:
+		size = mtd->oobsize;
+		break;
+	case NFC_READ_ALL:
+		size = mtd->oobsize + mtd->writesize;
+		break;
+	case NFC_READ_STD:
+	case NFC_READ_RAW:
+		/* If the column is set to anything but 0 in this mode
+		 * we're doing a partial page read so we need to decrease
+		 * the size accordingly.
+		 */
+		size = mtd->writesize - column;
+		break;
+	default:
+		BUG();
+	}
+
+	/* Set up ECC depending on mode */
+	setup_ecc(info, m == NFC_READ_STD, column);
+
+	/* Set up DMA and transfer size */
+
+	init_dmabuf(size);
+	init_dma(nfc_info->dma.phys, size);
+	nfc_write(size, DATA_SIZE_REG);
+
+	/* Set up addresses */
+
+	if (m == NFC_READ_OOB)
+		column += mtd->writesize;
+	nfc_write(column, ADDR0_COL_REG);
+	nfc_write(page, ADDR0_ROW_REG);
+
+	/* For devices > 128 MiB we have 5 address cycles and can use a
+	 * standard NFC command sequence. For smaller devices we have
+	 * 4 address cycles and need to use a Generic Command Sequence.
+	 */
+	if (info->chip.chipsize > (128 << 20)) {
+		command = COMMAND_READ_PAGE_DMA_STD;
+	} else {
+		nfc_write(GEN_SEQ_CTRL_READ_PAGE_4CYCLE, GEN_SEQ_CTRL_REG);
+		command = COMMAND_READ_PAGE_DMA_GEN;
+	}
+
+	command_and_wait(command, INT_STATUS_DMA_INT_FL);
+}
+
+/* Write using DMA */
+/* Assumes DMA has been set up previously and buffer contains data. */
+/* Contrary to read, column is set to writesize when writing to oob, by mtd.
+ * oob is set when the caller wants to write oob data along with the main data.
+ */
+static void write_dma(struct chip_info *info, int page, int column,
+		int oob, int raw)
+{
+	int size = info->cmd_cache.write_size;
+	uint32_t command;
+	struct mtd_info *mtd = nand_to_mtd(&info->chip);
+
+	if (column >= mtd->writesize || oob)
+		raw = 1;
+
+	setup_ecc(info, !raw, column);
+
+	/* Dump selected parts of buffer */
+	MTD_TRACE("Write %d bytes: 0x%08x 0x%08x .. 0x%08x\n", size,
+		  ((uint32_t *)(nfc_info->dma.buf))[0],
+		  ((uint32_t *)(nfc_info->dma.buf))[1],
+		  ((uint32_t *)(nfc_info->dma.buf))[size / 4 - 1]);
+
+	/* Set up DMA and transfer size */
+	init_dma(nfc_info->dma.phys, size);
+	nfc_write(size, DATA_SIZE_REG);
+
+	/* Set up addresses */
+
+	nfc_write(column, ADDR0_COL_REG);
+	nfc_write(page, ADDR0_ROW_REG);
+
+	/* For devices > 128 MiB we have 5 address cycles and can use a
+	 * standard NFC command sequence. For smaller devices we have
+	 * 4 address cycles and need to use a Generic Command Sequence.
+	 */
+	if (info->chip.chipsize > (128 << 20)) {
+		command = COMMAND_WRITE_PAGE_DMA_STD;
+	} else {
+		nfc_write(GEN_SEQ_CTRL_WRITE_PAGE_4CYCLE, GEN_SEQ_CTRL_REG);
+		command = COMMAND_WRITE_PAGE_DMA_GEN;
+	}
+
+	command_and_wait(command, INT_STATUS_DMA_INT_FL);
+
+	/* Don't need to check error status (INT_STATUS_REG.STAT_ERR_INT0_FL)
+	 * here, as the NAND subsystem checks device error status anyway after
+	 * the write command.
+	 */
+
+#ifdef CLEAR_DMA_BUF_AFTER_WRITE
+	/* clear buffer so it doesn't contain the written data anymore */
+	memset(nfc_info->dma.buf, 0, DMA_BUF_SIZE);
+#endif
+}
+
+/* Block erase */
+static void block_erase(int page, int cs)
+{
+	/* Set up addresses */
+	nfc_write(page, ADDR0_ROW_REG);
+	MTD_TRACE("Erase block containing page %d\n", page);
+
+	/* Send 3 address cycle block erase command */
+	command_and_wait(COMMAND_BLOCK_ERASE, INT_STATUS_MEM_RDY_INT_FL(cs));
+
+#ifndef POLLED_XFERS
+	MTD_TRACE("Erase block: INT_STATUS 0x%08x\n", nfc_info->irq.int_status);
+#endif
+
+	/* Don't need to check error status (INT_STATUS_REG.STAT_ERR_INT0_FL)
+	 * here, as the NAND subsystem checks device error status anyway after
+	 * the erase command. The error bit in practice probably just indicates
+	 * that the flash didn't pull R/_B low within tWB.
+	 */
+}
+
+/* Check for erased page.
+ * The prerequisite to calling this routine is: page has been read with
+ * HW ECC, which has returned an 'ecc uncorrectable' status, so either the
+ * page does in fact contain too many bitflips for the ECC algorithm to correct
+ * or the page is in fact erased, which results in the all-FF's ECC to
+ * be invalid relative to the all-FF's data on the page.
+ * Since with the Evatronix NFC we don't have access to either the ECC bytes
+ * or the oob area after a HW ECC read, the following algorithm is adopted:
+ * - Count the number of 0's in the main area. If there are more than
+ *   the ECC strength per ECC block we assume the page wasn't in fact erased,
+ *   and return with an error status.
+ * - If the main area appears erased, we still need to determine if the oob is
+ *   also erased, if not, it would appear that the page wasn't in fact erased,
+ *   and what we're looking at is a page of mostly-FF data with an invalid ECC.
+ *   - Thus we need to read the oob, leaving the main area at the start of the
+ *     DMA buffer in case someone actually wants to read the data later (e.g.
+ *     nanddump).
+ *   - We then count the number of non-zero bits in the oob. The accepted
+ *     number of zeros could be determined by figuring the the size ratio
+ *     of the oob compared to an ECC block. For instance, if the oob is 64
+ *     bytes, an ECC block 512 bytes, and the error correction capability
+ *     of 8 bits, then the accepted number of zeros for the oob to be
+ *     considered erased would be 64/512 * 8 = 1. Alternatively we could just
+ *     accept an error correction capability number of zeros.
+ *     If there are less than this threshold number of zero bits, the page
+ *     is considered erased. In this case we return an all-FF page to the user.
+ *     Otherwise, we consider ourselves to have an ECC error on our hands,
+ *     and we return the appropriate error status while at the same time leaving
+ *     original main area data in place, for potential scrutiny by a user space
+ *     application (e.g. nanddump).
+ * Caveat: It could be that there are some cases for which an almost-FF page
+ * yields an almost-FF ECC. If there are fewer than the error correction
+ * capability number of zero bits, we could conclude that such a page would
+ * be erased when in fact it actually contains data with too many bitflips.
+ * Experience will have to determine whether this can actually occur. From
+ * past experiences with ECC codes it seems unlikely that that trivial
+ * data will in fact result in a trivial ECC code. Even the fairly basic
+ * 1-bit error correction capability Hamming code does not on its own return
+ * an all-FF ECC for all-FF data.
+ *
+ * Function returns 1 if the page is in fact (considered) erased, 0 if not.
+ */
+static int check_erased_page(struct mtd_info *mtd, uint8_t *buf, int len)
+{
+	struct chip_info *info = TO_CHIP_INFO(mtd);
+	struct nand_chip *chip = &info->chip;
+
+	int eccsteps =  chip->ecc.steps;
+	int eccsize = chip->ecc.size;
+	int eccstrength = chip->ecc.strength;
+
+	int main_area_zeros = 0;
+
+	int step;
+	uint8_t *bufpos = buf;
+
+	MTD_TRACE("%s: %d byte page, ecc steps %d, size %d, strength %d\n",
+		  __func__, len, eccsteps, eccsize, eccstrength);
+
+	/* Check that main area appears erased. If not, return */
+
+	for (step = 0; step < eccsteps; step++) {
+		int zeros = count_zero_bits(bufpos, eccsize, eccstrength);
+
+		if (zeros > eccstrength)
+			return 0;
+		bufpos += eccsize;
+		main_area_zeros += zeros;
+	}
+
+	/* Ok, main area seems erased. Read oob so we can check it too. */
+
+	/* Note that this will overwrite the DMA buffer with the oob data,
+	 * which is ok since the main area data has already been copied
+	 * to buf earlier.
+	 */
+	read_dma(info, info->cmd_cache.page, info->cmd_cache.column,
+		 NFC_READ_OOB);
+
+	/* We go for the simple approach and accept eccstrength zero bits */
+	if (count_zero_bits(nfc_info->dma.buf, mtd->oobsize, eccstrength) >
+	    eccstrength)
+		return 0;
+
+	MTD_TRACE("%s: Page is erased.%s\n", __func__,
+		  main_area_zeros != 0 ? " Clearing main area to 0xff." : "");
+
+	if (main_area_zeros != 0)
+		memset(buf, 0xff, len);
+
+	return 1;
+}
+
+
+/**** MTD API ****/
+
+/* For cmd_ctrl (and possibly others) we need to do absolutely nothing, but the
+ * pointer is still required to point to a valid function.
+ */
+static void nfc_dummy_cmd_ctrl(struct mtd_info *mtd, int cmd,
+		unsigned int ctrl)
+{
+}
+
+/* Read state of ready pin */
+static int nfc_dev_ready(struct mtd_info *mtd)
+{
+	struct chip_info *info = TO_CHIP_INFO(mtd);
+	struct nfc_config *nfc_config = &info->nfc_config;
+
+	MTD_TRACE("mtd %p\n", mtd);
+
+	return !!(nfc_read(STATUS_REG) & nfc_config->mem_status_mask);
+
+}
+
+/* Read byte from DMA buffer */
+/* Not used directly, only via nfc_read_byte */
+static uint8_t nfc_read_dmabuf_byte(struct mtd_info *mtd)
+{
+	MTD_TRACE("mtd %p\n", mtd);
+	if (nfc_info->dma.bytes_left) {
+		nfc_info->dma.bytes_left--;
+		return *nfc_info->dma.ptr++;
+	} else
+		return 0; /* no data */
+}
+
+/* Read block of data from DMA buffer */
+static void nfc_read_dmabuf(struct mtd_info *mtd, uint8_t *buf, int len)
+{
+	MTD_TRACE("mtd %p, buf %p, len %d\n", mtd, buf, len);
+	if (len > nfc_info->dma.bytes_left) {
+		dev_crit(nfc_info->dev,
+			 "Trying to read %d bytes with %d bytes remaining\n",
+			 len, nfc_info->dma.bytes_left);
+		BUG();
+	}
+	memcpy(buf, nfc_info->dma.ptr, len);
+	nfc_info->dma.ptr += len;
+	nfc_info->dma.bytes_left -= len;
+}
+
+/* Set readout position in DMA buffer. Used for NAND_CMD_RNDOUT. */
+static void nfc_set_dmabuf_read_position(struct mtd_info *mtd, int offset)
+{
+	MTD_TRACE("mtd %p, offset %d\n", mtd, offset);
+	if (offset > nfc_info->dma.buf_bytes) {
+		dev_crit(nfc_info->dev,
+			 "Trying to read outside (%d) DMA buffer (%d bytes)\n",
+			 offset, nfc_info->dma.buf_bytes);
+		/* We could BUG() here but it seems a bit severe. */
+		/* Just set sane value for offset. */
+		offset = nfc_info->dma.buf_bytes;
+	}
+	nfc_info->dma.ptr = nfc_info->dma.buf;
+	nfc_info->dma.ptr += offset;
+	nfc_info->dma.bytes_left = nfc_info->dma.buf_bytes - offset;
+}
+
+/* Write block of data to DMA buffer */
+static void nfc_write_dmabuf(struct mtd_info *mtd, const uint8_t *buf, int len)
+{
+	struct chip_info *info = TO_CHIP_INFO(mtd);
+
+	MTD_TRACE("mtd %p, buf %p, len %d\n", mtd, buf, len);
+	if (len > nfc_info->dma.bytes_left) {
+		dev_crit(nfc_info->dev,
+			 "Trying to write %d bytes with %d bytes remaining\n",
+			 len, nfc_info->dma.bytes_left);
+		BUG();
+	}
+	memcpy(nfc_info->dma.ptr, buf, len);
+	nfc_info->dma.ptr += len;
+	nfc_info->dma.bytes_left -= len;
+	info->cmd_cache.write_size += len; /* calculate total length to write */
+}
+
+/* Read byte from DMA buffer or DATA_REG, depending on previous command. */
+/* Used by MTD for reading ID bytes, and chip status */
+static uint8_t nfc_read_byte(struct mtd_info *mtd)
+{
+	struct chip_info *info = TO_CHIP_INFO(mtd);
+	uint8_t status_value;
+
+	if (info->cmd_cache.command != NAND_CMD_STATUS)
+		return nfc_read_dmabuf_byte(mtd);
+
+	MTD_TRACE("Read status\n");
+
+	/* In order to read status, we need to send a READ_STATUS command
+	 * to the NFC first, in order to get the data into the DATA_REG.
+	 */
+	init_dreg_read(1);
+	/* We want to read all status bits from the device */
+	nfc_write(STATUS_MASK_STATE_MASK(0xff), STATUS_MASK_REG);
+	command_and_wait(COMMAND_READ_STATUS, INT_STATUS_DATA_REG_FL);
+	status_value = nfc_read(DATA_REG_REG) & 0xff;
+	MTD_TRACE("Status 0x%08x\n", status_value);
+	return status_value;
+}
+
+/* Do the dirty work for read_page_foo */
+static int nfc_read_page_mode(struct mtd_info *mtd, struct nand_chip *chip,
+		int offset, int len, uint8_t *buf, int oob_required, int page,
+		enum nfc_read_mode m)
+{
+	struct chip_info *info = TO_CHIP_INFO(mtd);
+	unsigned int max_bitflips;
+	uint32_t ecc_status;
+
+	MTD_TRACE("page %d, col %d, offs %d, size %d\n",
+		  page, info->cmd_cache.column, offset, len);
+
+	if (page != info->cmd_cache.page) {
+		MTD_TRACE("Warning: Read page has different page number than READ0: %d vs. %d\n",
+			  page, info->cmd_cache.page);
+	}
+
+	if (m == NFC_READ_STD) {
+		/* ECC error flags and counters are not cleared automatically
+		 * so we do it here.
+		 * Note that the design spec says nothing about having to
+		 * zero ECC_STAT (although it explicitly says that ECC_CNT
+		 * needs to be zeroed by software), but testing on actual
+		 * hardware reveals that this is in fact the case.
+		 */
+		nfc_write(0, ECC_STAT_REG);
+		nfc_write(0, ECC_CNT_REG);
+	}
+
+	read_dma(info, info->cmd_cache.page, info->cmd_cache.column + offset, m);
+
+	/* This is actually nfc_read_dmabuf */
+	/* We add the offset here because the nand_base expects the data
+	 * to be in the corresponding place in the page buffer, rather than
+	 * at the beginning.
+	 */
+	chip->read_buf(mtd, buf + offset, len);
+
+	if (m == NFC_READ_RAW)
+		return 0;
+
+	/* Get ECC status from controller */
+	ecc_status = nfc_read(ECC_STAT_REG);
+	max_bitflips = nfc_read(ECC_CNT_REG) & ECC_CNT_ERR_LVL_MASK;
+
+	if (ecc_status & ECC_STAT_UNC(info->nfc_config.cs))
+		if (!check_erased_page(mtd, buf, mtd->writesize)) {
+			dev_warn(nfc_info->dev, "Uncorrected errors!\n");
+			mtd->ecc_stats.failed++;
+		}
+
+	/* The following is actually not really correct, as the stats should
+	 * reflect _all_ bitflips, not just the largest one in the latest read.
+	 * We could rectify this by reading chip->ecc.bytes at a time,
+	 * and accumulating the statistics per read, but at least for now
+	 * the additional overhead doesn't seem to warrant the increased
+	 * accuracy of the statistics, since the important figure is the
+	 * max number of bitflips in a single ECC block returned by this
+	 * function.
+	 */
+	mtd->ecc_stats.corrected += max_bitflips;
+
+	MTD_TRACE("ECC read status: %s%s%s%s, correction count %d\n",
+		  ecc_status & ECC_STAT_UNC(info->nfc_config.cs) ?
+			"Uncorrected " : "",
+		  ecc_status & ECC_STAT_ERROR(info->nfc_config.cs)
+			? "Corrected " : "",
+		  ecc_status & ECC_STAT_OVER(info->nfc_config.cs)
+			? "Over limit " : "",
+		  ecc_status & (ECC_STAT_UNC(info->nfc_config.cs) |
+				ECC_STAT_ERROR(info->nfc_config.cs) |
+				ECC_STAT_OVER(info->nfc_config.cs))
+			?  "" : "ok",
+		  max_bitflips);
+
+	/* We shouldn't see oob_required for ECC reads. */
+	if (oob_required) {
+		dev_crit(nfc_info->dev, "Need separate read for the OOB\n");
+		BUG();
+	}
+
+	return max_bitflips;
+}
+
+/* Read page with HW ECC */
+static int nfc_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
+		uint8_t *buf, int oob_required, int page)
+{
+	MTD_TRACE("page %d, oobreq %d\n", page, oob_required);
+	return nfc_read_page_mode(mtd, chip, 0, mtd->writesize, buf,
+				  oob_required, page, NFC_READ_STD);
+}
+
+/* Read page with no ECC */
+static int nfc_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
+		uint8_t *buf, int oob_required, int page)
+{
+	struct chip_info *info = TO_CHIP_INFO(mtd);
+
+	MTD_TRACE("page %d, oobreq %d\n", page, oob_required);
+	/* Since we're doing a raw read we can safely ignore the return value
+	 * as it is the number of bit flips in ECC mode only.
+	 */
+	nfc_read_page_mode(mtd, chip, 0, mtd->writesize, buf, oob_required,
+			   page, NFC_READ_RAW);
+
+	if (!oob_required)
+		return 0;
+
+	/* Read OOB */
+	read_dma(info, info->cmd_cache.page, info->cmd_cache.column,
+		 NFC_READ_OOB);
+	chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
+
+	return 0;
+}
+
+/* Write page with HW ECC */
+/* This is the only place where we know we'll be writing w/ ECC */
+static int nfc_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
+		const uint8_t *buf, int oob_required, int page)
+{
+	struct chip_info *info = TO_CHIP_INFO(mtd);
+
+	MTD_TRACE("oob_required %d\n", oob_required);
+
+	/* The controller can't write data to the oob when ECC is enabled,
+	 * so we set oob_required to 0 here and don't process the oob
+	 * further even if requested. This could happen for instance if
+	 * using nandwrite -o without -n .
+	 */
+	if (oob_required)
+		dev_warn(nfc_info->dev, "Tried to write OOB with ECC!\n");
+	info->cmd_cache.oob_required = 0;
+	info->cmd_cache.write_raw = 0;
+
+	/* A bit silly this, this is actually nfc_write_dmabuf */
+	chip->write_buf(mtd, buf, mtd->writesize);
+
+	return 0;
+}
+
+/* Write page with no ECC */
+/* This is the only place where we know we won't be writing w/ ECC */
+static int nfc_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
+		const uint8_t *buf, int oob_required, int page)
+{
+	struct chip_info *info = TO_CHIP_INFO(mtd);
+
+	MTD_TRACE("oob_required %d\n", oob_required);
+
+	/* We need this for the upcoming PAGEPROG command */
+	info->cmd_cache.oob_required = oob_required;
+	info->cmd_cache.write_raw = 1;
+
+	/* A bit silly this, this is actually nfc_write_dmabuf */
+	chip->write_buf(mtd, buf, mtd->writesize);
+
+	if (oob_required)
+		chip->write_buf(mtd, info->chip.oob_poi, mtd->oobsize);
+
+	return 0;
+}
+
+/* Handle commands from MTD NAND layer */
+static void nfc_command(struct mtd_info *mtd, unsigned int command,
+			     int column, int page_addr)
+{
+	/* We know that an mtd belonging to us is actually only the first
+	 * struct in a multi-struct structure.
+	 */
+	struct chip_info *info = TO_CHIP_INFO(mtd);
+
+	/* Save command so that other parts of the API can figure out
+	 * what's actually going on.
+	 */
+	info->cmd_cache.command = command;
+
+	/* Configure the NFC for the flash chip in question. */
+	config_nfc(&info->nfc_config, info);
+
+	/* Some commands we execute immediately, while some need to be
+	 * deferred until we have all the data needed, i.e. for page read,
+	 * we can't initiate the read until we know if we are going to be
+	 * using raw mode or not.
+	 */
+	switch (command) {
+	case NAND_CMD_READ0:
+		MTD_TRACE("READ0 page %d, column %d, ecc mode %s\n",
+			  page_addr, column,
+			  info->chip.ecc.mode == NAND_ECC_HW ? "hardware" :
+							       "software");
+		if (info->chip.ecc.mode == NAND_ECC_HW) {
+			/* We do not yet know if the caller wants to
+			 * read the page with or without ECC, so we
+			 * just store the page number and main/oob flag
+			 * here.
+			 * (The page number also arrives via the subsequent
+			 * read_page call, so we don't really need to store it).
+			 */
+			info->cmd_cache.page = page_addr;
+			info->cmd_cache.column = column;
+		} else {
+			/* Read the whole page including oob */
+			info->cmd_cache.oob_required = 1;
+			read_dma(info, page_addr, column, NFC_READ_ALL);
+		}
+		break;
+	case NAND_CMD_READOOB:
+		MTD_TRACE("READOOB page %d, column %d\n", page_addr, column);
+		/* In contrast to READ0, where nand_base always calls
+		 * a read_page_foo function before reading the data,
+		 * for READOOB, read_buf is called instead.
+		 * We don't want the actual read in read_buf, so
+		 * we put it here.
+		 */
+		read_dma(info, page_addr, column, NFC_READ_OOB);
+		break;
+	case NAND_CMD_RNDOUT:
+		MTD_TRACE("RNDOUT column %d\n", column);
+		/* Just set read position in buffer of previously performed
+		 * read operation.
+		 */
+		nfc_set_dmabuf_read_position(mtd, column);
+		break;
+	case NAND_CMD_ERASE1:
+		MTD_TRACE("ERASE1 page %d\n", page_addr);
+		/* Just grab page parameter, wait until ERASE2 to do
+		 * something.
+		 */
+		info->cmd_cache.page = page_addr;
+		break;
+	case NAND_CMD_ERASE2:
+		MTD_TRACE("ERASE2 page %d, do it\n", info->cmd_cache.page);
+		/* Off we go! */
+		block_erase(info->cmd_cache.page, info->nfc_config.cs);
+		break;
+	case NAND_CMD_RESET:
+		MTD_TRACE("chip reset\n");
+		/* Clear the FIFOs */
+		nfc_write(FIFO_INIT_FIFO_INIT, FIFO_INIT_REG);
+		command_and_wait(COMMAND_RESET,
+				 INT_STATUS_CMD_END_INT_FL);
+		break;
+	case NAND_CMD_SEQIN:
+		MTD_TRACE("SEQIN column %d, page %d\n", column, page_addr);
+		/* Just grab some parameters, then wait until
+		 * PAGEPROG to do the actual operation.
+		 */
+		info->cmd_cache.page = page_addr;
+		info->cmd_cache.column = column;
+		info->cmd_cache.write_size = 0; /* bumped by nfc_write_dmabuf */
+		/* Prepare DMA buffer for data. We don't yet know
+		 * how much data there is, so set size to max.
+		 */
+		init_dmabuf(DMA_BUF_SIZE);
+		break;
+	case NAND_CMD_PAGEPROG:
+		/* Used for both main area and oob */
+		MTD_TRACE("PAGEPROG page %d, column %d, w/oob %d, raw %d\n",
+			  info->cmd_cache.page, info->cmd_cache.column,
+			  info->cmd_cache.oob_required,
+			  info->cmd_cache.write_raw);
+		write_dma(info, info->cmd_cache.page,
+			  info->cmd_cache.column,
+			  info->cmd_cache.oob_required,
+			  info->cmd_cache.write_raw);
+		break;
+	case NAND_CMD_READID:
+		MTD_TRACE("READID (0x%02x)\n", column);
+
+		/* Read specified ID bytes */
+		/* 0x00 would be NAND_READ_ID_ADDR_STD
+		 * 0x20 would be NAND_READ_ID_ADDR_ONFI
+		 * 0x40 would be NAND_READ_ID_ADDR_JEDEC
+		 * but NAND subsystem knows this and sends us the
+		 * address values directly.
+		 */
+
+		nfc_write(column, ADDR0_COL_REG);
+		nfc_write(0, ADDR0_ROW_REG);
+
+		init_dmabuf(READID_LENGTH);
+		init_dma(nfc_info->dma.phys, READID_LENGTH);
+		nfc_write(READID_LENGTH, DATA_SIZE_REG);
+
+		/* Send read id command */
+		command_and_wait(COMMAND_READ_ID,
+				 INT_STATUS_DMA_INT_FL);
+		break;
+	case NAND_CMD_STATUS:
+		MTD_TRACE("STATUS, defer to later read byte\n");
+		/* Don't do anything now, wait until we need to
+		 * actually read status.
+		 */
+		break;
+	case NAND_CMD_PARAM:
+		MTD_TRACE("PARAM (0x%02x)\n", column);
+
+		nfc_write(column, ADDR0_COL_REG);
+		nfc_write(0, ADDR0_ROW_REG);
+
+		init_dmabuf(NAND_PARAM_SIZE_MAX);
+		init_dma(nfc_info->dma.phys, NAND_PARAM_SIZE_MAX);
+		nfc_write(NAND_PARAM_SIZE_MAX, DATA_SIZE_REG);
+
+		command_and_wait(COMMAND_PARAM,
+				 INT_STATUS_DMA_INT_FL);
+		break;
+	default:
+		MTD_TRACE("Unhandled command 0x%02x (col %d, page addr %d)\n",
+			  command, column, page_addr);
+		break;
+	}
+}
+
+
+/**** Top level probing and device management ****/
+
+/* Verify ECC configuration set by nand_base.c, set defaults if needed */
+static void nfc_verify_ecc_config(struct platform_device *pdev,
+				  struct nand_chip *chip)
+{
+	struct device *dev = &pdev->dev;
+
+	/* ECC parameters */
+	if ((chip->ecc.mode != NAND_ECC_HW &&
+	     chip->ecc.mode != NAND_ECC_SOFT) ||
+	    chip->ecc.algo != NAND_ECC_BCH) {
+		dev_warn(dev, "Unsupported/unset ECC mode, setting HW BCH\n");
+		chip->ecc.mode = NAND_ECC_HW;
+		chip->ecc.algo = NAND_ECC_BCH;
+	}
+
+	/* NFC can handle 2 bits but ECC_BYTES macro can't and it's
+	 * highly unlikely we'd ever need to support 2 bits correction
+	 * in practice, so don't allow that case here.
+	 */
+	if (chip->ecc.strength != 4 && chip->ecc.strength != 8 &&
+	    chip->ecc.strength != 16 && chip->ecc.strength != 24 &&
+	    chip->ecc.strength != 32) {
+		chip->ecc.strength = 8;
+		dev_warn(dev, "Unsupported ECC strength, using default %d\n",
+			 chip->ecc.strength);
+	}
+
+	if (chip->ecc.size != 256 && chip->ecc.size != 512 &&
+	    chip->ecc.size != 1024) {
+		chip->ecc.size = 512;
+		dev_warn(dev, "Unsupported ECC step size, using default %d\n",
+			 chip->ecc.size);
+	}
+}
+
+/* Get proprietary configuration from device tree */
+/* (nand_base.c sets up ECC and BBT parameters for us) */
+static int nfc_get_dt_config(struct platform_device *pdev)
+{
+	struct nfc_setup *nfc_setup = dev_get_platdata(&pdev->dev);
+	struct device *dev = &pdev->dev;
+	struct device_node *np = pdev->dev.of_node;
+	int res, timings;
+
+	if (!np) {
+		dev_err(dev, "Can't retrieve device tree configuration!\n");
+		return -EINVAL;
+	}
+
+	timings = sizeof(nfc_setup->timings) / sizeof(u32);
+	res = of_property_read_u32_array(np, "evatronix,timings",
+					 (u32 *)&nfc_setup->timings, timings);
+	if (res < 0) {
+		dev_warn(dev, "NAND timing setup missing, using defaults\n");
+		/* Default values have been set, but we don't know what
+		 * read_u32_array does if it fails during parsing, so reset
+		 * them here again.
+		 */
+		memcpy(&nfc_setup->timings, &default_mode0_pll_enabled,
+		       sizeof(nfc_setup->timings));
+	}
+
+	res = !!of_get_property(np, "evatronix,use-bank-select", NULL);
+	if (res)
+		nfc_setup->use_bank_select = true;
+
+	res = !!of_get_property(np, "evatronix,rb-wired-and", NULL);
+	if (res)
+		nfc_setup->rb_wired_and = true;
+
+	return 0;
+}
+
+/* Number of ecc bytes needed, helper for ooblayout functions */
+static int nfc_eccbytes(struct mtd_info *mtd)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct device *dev = &mtd->dev;
+	int eccbytes = chip->ecc.bytes * (mtd->writesize / chip->ecc.size);
+
+	if (ECC_OFFSET + eccbytes > mtd->oobsize) {
+		dev_err(dev, "Need %d bytes for ECC and BBM, only have %d bytes in OOB\n",
+			ECC_OFFSET + eccbytes, mtd->oobsize);
+		eccbytes = mtd->oobsize - ECC_OFFSET;
+	}
+
+	return eccbytes;
+}
+
+/* Callbacks for ECC layout */
+
+/* We put the ECC just after the 2-byte bad block marker */
+static int nfc_ooblayout_ecc(struct mtd_info *mtd, int section,
+			     struct mtd_oob_region *oobregion)
+{
+	if (section)
+		return -ERANGE; /* We have only one section */
+
+	oobregion->offset = ECC_OFFSET;
+	oobregion->length = nfc_eccbytes(mtd);
+
+	return 0;
+}
+
+/* Everything after the ECC is considered free. */
+static int nfc_ooblayout_free(struct mtd_info *mtd, int section,
+			      struct mtd_oob_region *oobregion)
+{
+	if (section)
+		return -ERANGE; /* Only one free section */
+
+	oobregion->offset = ECC_OFFSET + nfc_eccbytes(mtd);
+	oobregion->length = mtd->oobsize - oobregion->offset;
+
+	return 0;
+}
+
+static const struct mtd_ooblayout_ops nfc_ooblayout_ops = {
+	.ecc = nfc_ooblayout_ecc,
+	.free = nfc_ooblayout_free,
+};
+
+/* Per-NAND-chip initialization. */
+static __init
+struct mtd_info *nfc_flash_probe(struct platform_device *pdev,
+				 unsigned int bank_no)
+{
+	struct mtd_info *mtd;
+	struct chip_info *this;
+	struct device *dev = &pdev->dev;
+	int pages_per_block, ecc_blksize, ecc_strength;
+	struct nfc_setup *nfc_setup = nfc_info->setup;
+
+	/* Allocate memory for NAND device structure (including mtd) */
+	this = devm_kzalloc(dev, sizeof(struct chip_info), GFP_KERNEL);
+	if (!this)
+		return NULL;
+
+	/* Link the nand data with the mtd structure */
+	mtd = nand_to_mtd(&this->chip);
+	nand_set_flash_node(&this->chip, pdev->dev.of_node);
+
+	/* Set up basic config for NAND controller hardware */
+
+	/* Device control. */
+	if (nfc_setup->use_bank_select) {
+		/* Separate chips regarded as different banks. */
+		this->nfc_config.mem_ctrl =
+			MEM_CTRL_BANK_SEL(bank_no) | MEM_CTRL_MEM0_WR;
+		this->nfc_config.cs = 0;
+	} else {
+		/* Separate chips regarded as different chip selects. */
+		this->nfc_config.mem_ctrl =
+			MEM_CTRL_MEM_CE(bank_no) | MEM_CTRL_MEM0_WR;
+		this->nfc_config.cs = bank_no;
+	}
+
+	if (nfc_setup->rb_wired_and) {
+		/* Ready/busy from all flash chips wired-AND:ed */
+		this->nfc_config.mem_status_mask = STATUS_MEM_ST(0);
+	} else {
+		/* Ready/busy from nand flash as separate per-device signals */
+		this->nfc_config.mem_status_mask = STATUS_MEM_ST(bank_no);
+	}
+
+	/* Our interface to the mtd API */
+	this->chip.cmdfunc = nfc_command;
+	this->chip.cmd_ctrl = nfc_dummy_cmd_ctrl;
+	this->chip.dev_ready = nfc_dev_ready;
+	this->chip.read_byte = nfc_read_byte;
+	this->chip.read_buf = nfc_read_dmabuf;
+	this->chip.write_buf = nfc_write_dmabuf;
+
+	/* Scan to find existence of the device */
+	/* Note that the NFC is not completely set up at this time, but
+	 * that is ok as we only need to identify the device here.
+	 */
+	if (nand_scan_ident(mtd, 1, NULL))
+		return NULL;
+
+	/* nand_scan_ident() sets up general DT properties, including
+	 * NAND_BUSWIDTH_16 which we don't support.
+	 */
+	if (this->chip.options & NAND_BUSWIDTH_16) {
+		dev_err(dev, "16 bit bus mode not supported\n");
+		return NULL;
+	}
+
+	/* Set up rest of config for NAND controller hardware */
+
+	/* Since nand_scan_ident() sets up the ECC config from DT, we
+	 * check its validity here before going on, and set defaults
+	 * (and display a warning) if the values are unacceptable.
+	 */
+	nfc_verify_ecc_config(pdev, &this->chip);
+
+	/* ECC block size and pages per block */
+	pages_per_block = mtd->erasesize / mtd->writesize;
+	ecc_blksize = this->chip.ecc.size;
+	ecc_strength = this->chip.ecc.strength;
+	this->nfc_config.control = CONTROL_ECC_BLOCK_SIZE(ecc_blksize) |
+				   CONTROL_BLOCK_SIZE(pages_per_block);
+
+	/* Set up ECC control and offset of ECC data */
+	/* We don't use the threshold capability of the controller, as we
+	 * let mtd handle that, so set the threshold to same as capability.
+	 */
+	this->nfc_config.ecc_ctrl = ECC_CTRL_ECC_THRESHOLD(ecc_strength) |
+				    ECC_CTRL_ECC_CAP(ecc_strength);
+
+	/* Since we've now completed the configuration, we need to force it to
+	 * be written to the NFC, else the caching in config_nfc will leave
+	 * the nfc_config values written since nand_scan_ident unwritten.
+	 */
+	config_nfc(&this->nfc_config, NULL);
+
+	/* ECC setup */
+
+	/* ECC API */
+	/* Override the following functions when using hardware ECC,
+	 * otherwise we use the defaults set up by nand_base.
+	 */
+	if (this->chip.ecc.mode == NAND_ECC_HW) {
+		this->chip.ecc.read_page = nfc_read_page_hwecc;
+		this->chip.ecc.read_page_raw = nfc_read_page_raw;
+		this->chip.ecc.write_page = nfc_write_page_hwecc;
+		this->chip.ecc.write_page_raw = nfc_write_page_raw;
+		this->chip.options |= NAND_NO_SUBPAGE_WRITE;
+	}
+
+	/* Not sure if these really need to be set for HW ECC; but if
+	 * nothing else we use the values for our lower level driver
+	 * to have a common point where it is all set up.
+	 */
+	this->chip.ecc.bytes = ECC_BYTES(ecc_strength, ecc_blksize);
+	mtd_set_ooblayout(mtd, &nfc_ooblayout_ops);
+
+	/* We set the bitflip_threshold at 75% of the error correction
+	 * level to get some margin in case bitflips happen in parts of the
+	 * flash that we don't read that often.
+	 */
+	/* We add 1 so that an ECC strength of 1 gives us a threshold of 1;
+	 * rather academic though, as we only support BCH anyway...
+	 */
+	mtd->bitflip_threshold = (ecc_strength + 1) * 3 / 4;
+
+	if (this->chip.bbt_options & NAND_BBT_USE_FLASH)
+		/* Enable the use of a flash based bad block table.
+		 * Since the OOB is not ECC protected we don't put BBT stuff
+		 * there. We also don't mark user-detected badblocks as bad in
+		 * their oob, only in the BBT, to avoid potential chip problems
+		 * when attempting to write bad blocks (writing to bad blocks
+		 * is not recommended according to flash manufacturers).
+		 */
+		this->chip.bbt_options |= NAND_BBT_NO_OOB | NAND_BBT_NO_OOB_BBM;
+
+	this->chip.controller = nfc_info->controller;
+
+	/* Finalize NAND scan, including BBT if requested */
+	if (nand_scan_tail(mtd))
+		return NULL;
+
+	mtd->dev.parent = &pdev->dev;
+
+	return mtd;
+}
+
+/* Main probe function. Called to probe and set up device. */
+static int __init nfc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct mtd_info *main_mtd;
+	struct nfc_setup *nfc_setup;
+	struct nand_hw_control *controller;
+	int err = 0;
+
+	dev_info(dev, "Initializing Evatronix NANDFLASH-CTRL driver\n");
+
+	/* nfc_info is where we keep runtime information about the NFC */
+	nfc_info = devm_kzalloc(dev, sizeof(*nfc_info), GFP_KERNEL);
+	if (!nfc_info)
+		return -ENOMEM;
+
+	nfc_info->dev = dev;
+	spin_lock_init(&nfc_info->lock);
+
+	/* Set up a controller struct to act as shared lock for all devices */
+	controller = devm_kzalloc(dev, sizeof(*controller), GFP_KERNEL);
+	if (!controller)
+		return -ENOMEM;
+
+	spin_lock_init(&controller->lock);
+	init_waitqueue_head(&controller->wq);
+	nfc_info->controller = controller;
+
+	/* nfc_setup is where we keep settings from DT, in digested form */
+	nfc_setup = devm_kzalloc(dev, sizeof(*nfc_setup), GFP_KERNEL);
+	if (!nfc_setup)
+		return -ENOMEM;
+
+	pdev->dev.platform_data = nfc_setup;
+	nfc_info->setup = nfc_setup;
+
+	memcpy(&nfc_setup->timings, &default_mode0_pll_enabled,
+	       sizeof(nfc_setup->timings));
+
+	/* Get config from device tree. */
+	err = nfc_get_dt_config(pdev);
+	if (err)
+		return err;
+
+	/* Initialize interrupts and DMA etc. */
+	err = nfc_init_resources(pdev);
+	if (err)
+		return err;
+
+	setup_nfc_timing(nfc_setup);
+
+#ifndef POLLED_XFERS
+	init_waitqueue_head(&nfc_info->irq.wq);
+#endif
+
+	main_mtd = nfc_flash_probe(pdev, 0);
+	if (!main_mtd)
+		return -ENXIO;
+
+	dev_info(dev, "ECC using %s mode with strength %i and block size %i.\n",
+		 mtd_to_nand(main_mtd)->ecc.mode == NAND_ECC_HW ? "hardware" :
+								  "software",
+		 mtd_to_nand(main_mtd)->ecc.strength,
+		 mtd_to_nand(main_mtd)->ecc.size);
+
+	/* Map mtd partitions from DT */
+	err = mtd_device_register(main_mtd, NULL, 0);
+
+	return err;
+}
+
+static const struct of_device_id nfc_id_table[] = {
+	{ .compatible = "evatronix,nandflash-ctrl" },
+	{} /* sentinel */
+};
+MODULE_DEVICE_TABLE(of, nfc_id_table);
+
+static struct platform_driver nfc_driver = {
+	.driver = {
+		.name   = "evatronix-nand",
+		.of_match_table = of_match_ptr(nfc_id_table),
+	},
+	.probe = nfc_probe,
+};
+
+module_platform_driver(nfc_driver);
+
+MODULE_AUTHOR("Ricard Wanderlof <ricardw@axis.com>");
+MODULE_DESCRIPTION("Evatronix NANDFLASH-CTRL driver");
+MODULE_LICENSE("GPL v2");
-- 
1.7.10.4

-- 
Ricard Wolf Wanderlöf                           ricardw(at)axis.com
Axis Communications AB, Lund, Sweden            www.axis.com
Phone +46 46 272 2016                           Fax +46 46 13 61 30

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/4] mtd: nand: Add support for Evatronix NANDFLASH-CTRL
  2016-06-02  7:48 [PATCH 3/4] mtd: nand: Add support for Evatronix NANDFLASH-CTRL Ricard Wanderlof
@ 2016-06-03 14:59 ` Boris Brezillon
  2016-06-09  8:19   ` Ricard Wanderlof
  0 siblings, 1 reply; 18+ messages in thread
From: Boris Brezillon @ 2016-06-03 14:59 UTC (permalink / raw)
  To: Ricard Wanderlof
  Cc: Brian Norris, David Woodhouse, Benoit Cousson, Tony Lindgren,
	devicetree, Linux mtd, linux-kernel, Oleksij Rempel

On Thu, 2 Jun 2016 09:48:32 +0200
Ricard Wanderlof <ricard.wanderlof@axis.com> wrote:

> Driver for the Evatronix NANDFLASH-CTRL NAND flash controller IP. This
> controller is used in the Axis ARTPEC-6 SoC.
> 
> The driver supports BCH ECC using the controller's hardware, but there is
> also an option to use software BCH ECC. However, the ECC layouts are not
> compatible so it's not possible to mix them. The main advantage to using
> software ECC is that there are more OOB bytes free, as the hardware is
> slightly wasteful on OOB space.
> 
> BCH ECC from 4 to 32 bits over 256, 512 or 1024 byte ECC blocks is supported.
> 
> Only large-page flash chips are supported, using 4 or 5 address cycles.
> 
> The driver has been extensively tested using hardware ECC on 2 Mbit flash chips,
> with 8 bit ECC over 512 bytes ECC blocks.
> 
> Signed-off-by: Ricard Wanderlof <ricardw@axis.com>
> ---
>  drivers/mtd/nand/Kconfig          |    6 +
>  drivers/mtd/nand/Makefile         |    1 +
>  drivers/mtd/nand/evatronix_nand.c | 1909 +++++++++++++++++++++++++++++++++++++
>  3 files changed, 1916 insertions(+)
>  create mode 100644 drivers/mtd/nand/evatronix_nand.c

Please run checkpatch.pl and fix all the ERRORS and WARNINGS.

> 
> diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
> index f05e0e9..30fba73 100644
> --- a/drivers/mtd/nand/Kconfig
> +++ b/drivers/mtd/nand/Kconfig
> @@ -295,6 +295,12 @@ config MTD_NAND_DOCG4
>  	  by the block containing the saftl partition table.  This is probably
>  	  typical.
>  
> +config MTD_NAND_EVATRONIX
> +	tristate "Enable Evatronix NANDFLASH-CTRL driver"
> +	help
> +	  NAND hardware driver for Evatronix NANDFLASH-CTRL
> +	  NAND flash controller.
> +
>  config MTD_NAND_SHARPSL
>  	tristate "Support for NAND Flash on Sharp SL Series (C7xx + others)"
>  	depends on ARCH_PXA
> diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
> index f553353..ac89b12 100644
> --- a/drivers/mtd/nand/Makefile
> +++ b/drivers/mtd/nand/Makefile
> @@ -19,6 +19,7 @@ obj-$(CONFIG_MTD_NAND_S3C2410)		+= s3c2410.o
>  obj-$(CONFIG_MTD_NAND_DAVINCI)		+= davinci_nand.o
>  obj-$(CONFIG_MTD_NAND_DISKONCHIP)	+= diskonchip.o
>  obj-$(CONFIG_MTD_NAND_DOCG4)		+= docg4.o
> +obj-$(CONFIG_MTD_NAND_EVATRONIX)	+= evatronix_nand.o
>  obj-$(CONFIG_MTD_NAND_FSMC)		+= fsmc_nand.o
>  obj-$(CONFIG_MTD_NAND_SHARPSL)		+= sharpsl.o
>  obj-$(CONFIG_MTD_NAND_NANDSIM)		+= nandsim.o
> diff --git a/drivers/mtd/nand/evatronix_nand.c b/drivers/mtd/nand/evatronix_nand.c
> new file mode 100644
> index 0000000..94eb582
> --- /dev/null
> +++ b/drivers/mtd/nand/evatronix_nand.c
> @@ -0,0 +1,1909 @@
> +/*
> + * evatronix_nand.c - NAND Flash Driver for Evatronix NANDFLASH-CTRL
> + * NAND Flash Controller IP.
> + *
> + * Intended to handle one NFC, with up to two connected NAND flash chips,
> + * one per bank.
> + *
> + * This implementation has been designed against Rev 1.15 and Rev 1.16 of the
> + * NANDFLASH-CTRL Design Specification.
> + * Note that Rev 1.15 specifies up to 8 chip selects, whereas Rev 1.16
> + * only specifies one. We keep the definitions for the multiple chip
> + * selects though for future reference.
> + *
> + * The corresponding IP version is NANDFLASH-CTRL-DES-6V09H02RE08 .
> + *
> + * Copyright (c) 2016 Axis Communication AB, Lund, Sweden.
> + * Portions Copyright (c) 2010 ST Microelectronics
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#include <asm/dma.h>
> +#include <linux/bitops.h> /* for ffs() */
> +#include <linux/io.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/err.h>
> +#include <linux/interrupt.h>
> +#include <linux/module.h>
> +#include <linux/platform_device.h>
> +#include <linux/of.h>
> +#include <linux/slab.h>
> +#include <linux/mtd/mtd.h>
> +#include <linux/mtd/nand.h>
> +#include <linux/mtd/concat.h>
> +#include <linux/mtd/partitions.h>
> +#include <linux/version.h>

You seem to include a lot of things, and even asm headers. Please make
sure you really need them.

> +
> +/* Driver configuration */
> +
> +/* Some of this could potentially be moved to DT, but it represents stuff
> + * that is either untested, only used for debugging, or things we really
> + * don't want anyone to change, so we keep it here until a clear use case
> + * emerges.
> + */
> +
> +#undef NFC_DMA64BIT /* NFC hardware support for 64-bit DMA transfers */
> +
> +#undef POLLED_XFERS /* Use polled rather than interrupt based transfers */
> +
> +#undef CLEAR_DMA_BUF_AFTER_WRITE /* Useful for debugging */

Then simply drop the code in those sections and add it back when it's
been tested.

> +
> +/* DMA buffer for page transfers. */
> +#define DMA_BUF_SIZE (8192 + 640) /* main + spare for 8k page flash */

This should clearly be dynamic.

> +
> +/* # bytes into the OOB we put our ECC */
> +#define ECC_OFFSET 2
> +
> +/* Number of bytes that we read using READID command.
> + * When reading IDs the IP requires us set up the number of bytes to read
> + * prior to executing the operation, whereas the NAND subsystem would rather
> + * like us to be able to read one byte at a time from the chip. So we fake
> + * this by reading a set number of ID bytes, and then let the NAND subsystem
> + * read from our DMA buffer.
> + */
> +#define READID_LENGTH 8
> +
> +/* Debugging */
> +
> +#define MTD_TRACE(FORMAT, ...) pr_debug("%s: " FORMAT, __func__, ## __VA_ARGS__)

Hm, I'm not a big fan of those custom pr_debug() macros, but if you
really wan to keep it you shouldn't prefix it with MTD_.

Reading at the code I see a lot of MTD_TRACE() calls, while I'm not
against debug traces, it seems to me that you've kept traces you used
while developing/debugging your implementation. Can you clean it up and
keep only the relevant ones.

> +
> +/* Register offsets for Evatronix NANDFLASH-CTRL IP */
> +
> +/* Register field shift values and masks are interespersed as it makes
> + * them easier to locate.
> + *
> + * We use shift values rather than direct masks (e.g. 0x0000d000), as the
> + * hardware manual lists the bit number, making the definitions below
> + * easier to verify against the manual.
> + *
> + * All (known) registers are here, but we only put in the bit fields
> + * for the fields we need.
> + *
> + * We try to be consistent regarding _SIZE/_MASK/_value macros so as to
> + * get a consistent layout here, except for trivial cases where there is
> + * only a single bit or field in a register at bit offset 0.
> + */
> +
> +#define COMMAND_REG		0x00
> +/* The masks reflect the input data to the MAKE_COMMAND macro, rather than
> + * the bits in the register itself. These macros are not intended to be
> + * used by the user, who should use the MAKE_COMMAND et al macros.
> + */
> +#define _CMD_SEQ_SHIFT			0
> +#define _INPUT_SEL_SHIFT		6
> +#define _DATA_SEL_SHIFT			7
> +#define _CMD_0_SHIFT			8
> +#define _CMD_1_3_SHIFT			16
> +#define _CMD_2_SHIFT			24
> +
> +#define _CMD_SEQ_MASK			0x3f
> +#define _INPUT_SEL_MASK			1
> +#define _DATA_SEL_MASK			1
> +#define _CMD_MASK			0xff /* for all CMD_foo */
> +
> +#define MAKE_COMMAND(CMD_SEQ, INPUT_SEL, DATA_SEL, CMD_0, CMD_1_3, CMD_2) \
> +	((((CMD_SEQ)	& _CMD_SEQ_MASK)	<< _CMD_SEQ_SHIFT)	| \
> +	 (((INPUT_SEL)	& _INPUT_SEL_MASK)	<< _INPUT_SEL_SHIFT)	| \
> +	 (((DATA_SEL)	& _DATA_SEL_MASK)	<< _DATA_SEL_SHIFT)	| \
> +	 (((CMD_0)	& _CMD_MASK)		<< _CMD_0_SHIFT)	| \
> +	 (((CMD_1_3)	& _CMD_MASK)		<< _CMD_1_3_SHIFT)	| \
> +	 (((CMD_2)	& _CMD_MASK)		<< _CMD_2_SHIFT))
> +
> +#define INPUT_SEL_SIU			0
> +#define INPUT_SEL_DMA			1
> +#define DATA_SEL_FIFO			0
> +#define DATA_SEL_DATA_REG		1
> +
> +#define CONTROL_REG		0x04
> +#define CONTROL_BLOCK_SIZE_32		(0 << 6)
> +#define CONTROL_BLOCK_SIZE_64		(1 << 6)
> +#define CONTROL_BLOCK_SIZE_128		(2 << 6)
> +#define CONTROL_BLOCK_SIZE_256		(3 << 6)
> +#define CONTROL_BLOCK_SIZE(SIZE)	((ffs(SIZE) - 6) << 6)
> +#define CONTROL_ECC_EN			(1 << 5)
> +#define CONTROL_INT_EN			(1 << 4)
> +#define CONTROL_ECC_BLOCK_SIZE_256	(0 << 1)
> +#define CONTROL_ECC_BLOCK_SIZE_512	(1 << 1)
> +#define CONTROL_ECC_BLOCK_SIZE_1024	(2 << 1)
> +#define CONTROL_ECC_BLOCK_SIZE(SIZE)	((ffs(SIZE) - 9) << 1)
> +#define STATUS_REG		0x08
> +#define STATUS_MEM_ST(CS)		(1 << (CS))
> +#define STATUS_CTRL_STAT		(1 << 8)
> +#define STATUS_MASK_REG		0x0C
> +#define STATE_MASK_SHIFT		0
> +#define STATUS_MASK_STATE_MASK(MASK)	(((MASK) & 0xff) << STATE_MASK_SHIFT)
> +#define ERROR_MASK_SHIFT		8
> +#define STATUS_MASK_ERROR_MASK(MASK)	(((MASK) & 0xff) << ERROR_MASK_SHIFT)
> +#define INT_MASK_REG		0x10
> +#define INT_MASK_ECC_INT_EN(CS)		(1 << (24 + (CS)))
> +#define INT_MASK_STAT_ERR_INT_EN(CS)	(1 << (16 + (CS)))
> +#define INT_MASK_MEM_RDY_INT_EN(CS)	(1 << (8 + (CS)))
> +#define INT_MASK_DMA_INT_EN		(1 << 3)
> +#define INT_MASK_DATA_REG_EN		(1 << 2)
> +#define INT_MASK_CMD_END_INT_EN		(1 << 1)
> +#define INT_STATUS_REG		0x14
> +#define INT_STATUS_ECC_INT_FL(CS)	(1 << (24 + (CS)))
> +#define INT_STATUS_STAT_ERR_INT_FL(CS)	(1 << (16 + (CS)))
> +#define INT_STATUS_MEM_RDY_INT_FL(CS)	(1 << (8 + (CS)))
> +#define INT_STATUS_DMA_INT_FL		(1 << 3)
> +#define INT_STATUS_DATA_REG_FL		(1 << 2)
> +#define INT_STATUS_CMD_END_INT_FL	(1 << 1)
> +#define ECC_CTRL_REG		0x18
> +#define ECC_CTRL_ECC_CAP_2		(0 << 0)
> +#define ECC_CTRL_ECC_CAP_4		(1 << 0)
> +#define ECC_CTRL_ECC_CAP_8		(2 << 0)
> +#define ECC_CTRL_ECC_CAP_16		(3 << 0)
> +#define ECC_CTRL_ECC_CAP_24		(4 << 0)
> +#define ECC_CTRL_ECC_CAP_32		(5 << 0)
> +#define ECC_CTRL_ECC_CAP(B)		((B) < 24 ? ffs(B) - 2 : (B) / 6)
> +/* # ECC corrections that are acceptable during read before setting OVER flag */
> +#define ECC_CTRL_ECC_THRESHOLD(VAL)	(((VAL) & 0x3f) << 8)
> +#define ECC_OFFSET_REG		0x1C
> +#define ECC_STAT_REG		0x20
> +/* Correctable error flag(s) */
> +#define ECC_STAT_ERROR(CS)		(1 << (0 + (CS)))
> +/* Uncorrectable error flag(s) */
> +#define ECC_STAT_UNC(CS)		(1 << (8 + (CS)))
> +/* Acceptable errors level overflow flag(s) */
> +#define ECC_STAT_OVER(CS)		(1 << (16 + (CS)))
> +#define ADDR0_COL_REG		0x24
> +#define ADDR0_ROW_REG		0x28
> +#define ADDR1_COL_REG		0x2C
> +#define ADDR1_ROW_REG		0x30
> +#define PROTECT_REG		0x34
> +#define FIFO_DATA_REG		0x38
> +#define DATA_REG_REG		0x3C
> +#define DATA_REG_SIZE_REG	0x40
> +#define DATA_REG_SIZE_DATA_REG_SIZE(SIZE) (((SIZE) - 1) & 3)
> +#define DEV0_PTR_REG		0x44
> +#define DEV1_PTR_REG		0x48
> +#define DEV2_PTR_REG		0x4C
> +#define DEV3_PTR_REG		0x50
> +#define DEV4_PTR_REG		0x54
> +#define DEV5_PTR_REG		0x58
> +#define DEV6_PTR_REG		0x5C
> +#define DEV7_PTR_REG		0x60
> +#define DMA_ADDR_L_REG		0x64
> +#define DMA_ADDR_H_REG		0x68
> +#define DMA_CNT_REG		0x6C
> +#define DMA_CTRL_REG		0x70
> +#define DMA_CTRL_DMA_START		(1 << 7) /* start on command */
> +#define DMA_CTRL_DMA_MODE_SG		(1 << 5) /* scatter/gather mode */
> +#define DMA_CTRL_DMA_BURST_I_P_4	(0 << 2) /* incr. precise burst */
> +#define DMA_CTRL_DMA_BURST_S_P_16	(1 << 2) /* stream precise burst */
> +#define DMA_CTRL_DMA_BURST_SINGLE	(2 << 2) /* single transfer */
> +#define DMA_CTRL_DMA_BURST_UNSPEC	(3 << 2) /* burst of unspec. length */
> +#define DMA_CTRL_DMA_BURST_I_P_8	(4 << 2) /* incr. precise burst */
> +#define DMA_CTRL_DMA_BURST_I_P_16	(5 << 2) /* incr. precise burst */
> +#define DMA_CTRL_ERR_FLAG		(1 << 1) /* read only */
> +#define DMA_CTRL_DMA_READY		(1 << 0) /* read only */
> +#define BBM_CTRL_REG		0x74
> +#define MEM_CTRL_REG		0x80
> +#define MEM_CTRL_MEM_CE(CE)		(((CE) & 7) << 0)
> +#define MEM_CTRL_BANK_SEL(BANK)		(((BANK) & 7) << 16)
> +#define MEM_CTRL_MEM0_WR	BIT(8)
> +#define DATA_SIZE_REG		0x84
> +#define TIMINGS_ASYN_REG	0x88
> +#define TIMINGS_SYN_REG		0x8C
> +#define TIME_SEQ_0_REG		0x90
> +#define TIME_SEQ_1_REG		0x94
> +#define TIME_GEN_SEQ_0_REG	0x98
> +#define TIME_GEN_SEQ_1_REG	0x9C
> +#define TIME_GEN_SEQ_2_REG	0xA0
> +#define FIFO_INIT_REG		0xB0
> +#define FIFO_INIT_FIFO_INIT			1 /* Flush FIFO */
> +#define FIFO_STATE_REG		0xB4
> +#define FIFO_STATE_DF_W_EMPTY		(1 << 7)
> +#define FIFO_STATE_DF_R_FULL		(1 << 6)
> +#define FIFO_STATE_CF_ACCPT_W		(1 << 5)
> +#define FIFO_STATE_CF_ACCPT_R		(1 << 4)
> +#define FIFO_STATE_CF_FULL		(1 << 3)
> +#define FIFO_STATE_CF_EMPTY		(1 << 2)
> +#define FIFO_STATE_DF_W_FULL		(1 << 1)
> +#define FIFO_STATE_DF_R_EMPTY		(1 << 0)
> +#define GEN_SEQ_CTRL_REG	0xB8		/* aka GENERIC_SEQ_CTRL */
> +#define _CMD0_EN_SHIFT			0
> +#define _CMD1_EN_SHIFT			1
> +#define _CMD2_EN_SHIFT			2
> +#define _CMD3_EN_SHIFT			3
> +#define _COL_A0_SHIFT			4
> +#define _COL_A1_SHIFT			6
> +#define _ROW_A0_SHIFT			8
> +#define _ROW_A1_SHIFT			10
> +#define _DATA_EN_SHIFT			12
> +#define _DELAY_EN_SHIFT			13
> +#define _IMD_SEQ_SHIFT			15
> +#define _CMD3_SHIFT			16
> +#define ECC_CNT_REG		0x14C
> +#define ECC_CNT_ERR_LVL_MASK		0x3F
> +
> +#define _CMD0_EN_MASK			1
> +#define _CMD1_EN_MASK			1
> +#define _CMD2_EN_MASK			1
> +#define _CMD3_EN_MASK			1
> +#define _COL_A0_MASK			3
> +#define _COL_A1_MASK			3
> +#define _ROW_A0_MASK			3
> +#define _ROW_A1_MASK			3
> +#define _DATA_EN_MASK			1
> +#define _DELAY_EN_MASK			3
> +#define _IMD_SEQ_MASK			1
> +#define _CMD3_MASK			0xff
> +
> +/* DELAY_EN field values, non-shifted */
> +#define _BUSY_NONE			0
> +#define _BUSY_0				1
> +#define _BUSY_1				2
> +
> +/* Slightly confusingly, the DELAYx_EN fields enable BUSY phases. */
> +#define MAKE_GEN_CMD(CMD0_EN, CMD1_EN, CMD2_EN, CMD3_EN, \
> +		     COL_A0, ROW_A0, COL_A1, ROW_A1, \
> +		     DATA_EN, BUSY_EN, IMMEDIATE_SEQ, CMD3) \
> +	((((CMD0_EN)	& _CMD0_EN_MASK)	<< _CMD0_EN_SHIFT)	| \
> +	 (((CMD1_EN)	& _CMD1_EN_MASK)	<< _CMD1_EN_SHIFT)	| \
> +	 (((CMD2_EN)	& _CMD2_EN_MASK)	<< _CMD2_EN_SHIFT)	| \
> +	 (((CMD3_EN)	& _CMD3_EN_MASK)	<< _CMD3_EN_SHIFT)	| \
> +	 (((COL_A0)	& _COL_A0_MASK)		<< _COL_A0_SHIFT)	| \
> +	 (((COL_A1)	& _COL_A1_MASK)		<< _COL_A1_SHIFT)	| \
> +	 (((ROW_A0)	& _ROW_A0_MASK)		<< _ROW_A0_SHIFT)	| \
> +	 (((ROW_A1)	& _ROW_A1_MASK)		<< _ROW_A1_SHIFT)	| \
> +	 (((DATA_EN)	& _DATA_EN_MASK)	<< _DATA_EN_SHIFT)	| \
> +	 (((BUSY_EN)	& _DELAY_EN_MASK)	<< _DELAY_EN_SHIFT)	| \
> +	 (((IMMEDIATE_SEQ) & _IMD_SEQ_MASK)	<< _IMD_SEQ_SHIFT)	| \
> +	 (((CMD3)	& _CMD3_MASK)		<< _CMD3_SHIFT))
> +
> +/* The sequence encodings are not trivial. The ones we use are listed here. */
> +#define _SEQ_0			0x00 /* send one cmd, then wait for ready */
> +#define _SEQ_1			0x21 /* send one cmd, one addr, fetch data */
> +#define _SEQ_2			0x22 /* send one cmd, one addr, fetch data */
> +#define _SEQ_4			0x24 /* single cycle write then read */
> +#define _SEQ_10			0x2A /* read page */
> +#define _SEQ_12			0x0C /* write page, don't wait for R/B */
> +#define _SEQ_18			0x32 /* read page using general cycle */
> +#define _SEQ_19			0x13 /* write page using general cycle */
> +#define _SEQ_14			0x0E /* 3 address cycles, for block erase */
> +
> +#define MLUN_REG		0xBC
> +#define DEV0_SIZE_REG		0xC0
> +#define DEV1_SIZE_REG		0xC4
> +#define DEV2_SIZE_REG		0xC8
> +#define DEV3_SIZE_REG		0xCC
> +#define DEV4_SIZE_REG		0xD0
> +#define DEV5_SIZE_REG		0xD4
> +#define DEV6_SIZE_REG		0xD8
> +#define DEV7_SIZE_REG		0xDC
> +#define SS_CCNT0_REG		0xE0
> +#define SS_CCNT1_REG		0xE4
> +#define SS_SCNT_REG		0xE8
> +#define SS_ADDR_DEV_CTRL_REG	0xEC
> +#define SS_CMD0_REG		0xF0
> +#define SS_CMD1_REG		0xF4
> +#define SS_CMD2_REG		0xF8
> +#define SS_CMD3_REG		0xFC
> +#define SS_ADDR_REG		0x100
> +#define SS_MSEL_REG		0x104
> +#define SS_REQ_REG		0x108
> +#define SS_BRK_REG		0x10C
> +#define DMA_TLVL_REG		0x114
> +#define DMA_TLVL_MAX		0xFF
> +#define AES_CTRL_REG		0x118
> +#define AES_DATAW_REG		0x11C
> +#define AES_SVECT_REG		0x120
> +#define CMD_MARK_REG		0x124
> +#define LUN_STATUS_0_REG	0x128
> +#define LUN_STATUS_1_REG	0x12C
> +#define TIMINGS_TOGGLE_REG	0x130
> +#define TIME_GEN_SEQ_3_REG	0x134
> +#define SQS_DELAY_REG		0x138
> +#define CNE_MASK_REG		0x13C
> +#define CNE_VAL_REG		0x140
> +#define CNA_CTRL_REG		0x144
> +#define INTERNAL_STATUS_REG	0x148
> +#define ECC_CNT_REG		0x14C
> +#define PARAM_REG_REG		0x150
> +
> +/* NAND flash command generation */
> +
> +/* NAND flash command codes */
> +#define NAND_RESET		0xff
> +#define NAND_READ_STATUS	0x70
> +#define NAND_READ_ID		0x90
> +#define NAND_READ_ID_ADDR_STD	0x00	/* address written to ADDR0_COL */
> +#define NAND_READ_ID_ADDR_ONFI	0x20	/* address written to ADDR0_COL */
> +#define NAND_READ_ID_ADDR_JEDEC	0x40	/* address written to ADDR0_COL */
> +#define NAND_PARAM		0xEC
> +#define NAND_PARAM_SIZE_MAX		768 /* bytes */
> +#define NAND_PAGE_READ		0x00
> +#define NAND_PAGE_READ_END	0x30
> +#define NAND_BLOCK_ERASE	0x60
> +#define NAND_BLOCK_ERASE_END	0xd0
> +#define NAND_PAGE_WRITE		0x80
> +#define NAND_PAGE_WRITE_END	0x10
> +
> +#define _DONT_CARE 0x00 /* When we don't have anything better to say */
> +
> +
> +/* Assembled values for putting into COMMAND register */
> +
> +/* Reset NAND flash */
> +
> +/* Uses SEQ_0: non-directional sequence, single command, wait for ready */
> +#define COMMAND_RESET \
> +	MAKE_COMMAND(_SEQ_0, INPUT_SEL_SIU, DATA_SEL_FIFO, \
> +		NAND_RESET, _DONT_CARE, _DONT_CARE)
> +
> +/* Read status */
> +
> +/* Uses SEQ_4: single command, then read data via DATA_REG */
> +#define COMMAND_READ_STATUS \
> +	MAKE_COMMAND(_SEQ_4, INPUT_SEL_SIU, DATA_SEL_DATA_REG, \
> +		NAND_READ_STATUS, _DONT_CARE, _DONT_CARE)
> +
> +/* Read ID */
> +
> +/* Uses SEQ_1: single command, ADDR0_COL, then read data via FIFO */
> +/* ADDR0_COL is set to NAND_READ_ID_ADDR_STD for non-ONFi, and
> + * NAND_READ_ID_ADDR_ONFI for ONFi.
> + * The controller reads 5 bytes in the non-ONFi case, and 4 bytes in the
> + * ONFi case, so the data reception (DMA or FIFO_REG) needs to be set up
> + * accordingly.
> + */
> +#define COMMAND_READ_ID \
> +	MAKE_COMMAND(_SEQ_1, INPUT_SEL_DMA, DATA_SEL_FIFO, \
> +		NAND_READ_ID, _DONT_CARE, _DONT_CARE)
> +
> +#define COMMAND_PARAM \
> +	MAKE_COMMAND(_SEQ_2, INPUT_SEL_DMA, DATA_SEL_FIFO, \
> +		NAND_PARAM, _DONT_CARE, _DONT_CARE)
> +
> +/* Page read via slave interface (FIFO_DATA register) */
> +
> +/* Standard 5-cycle read command, with 0x30 end-of-cycle marker */
> +/* Uses SEQ_10: CMD0 + 5 address cycles + CMD2, read data */
> +#define COMMAND_READ_PAGE_STD \
> +	MAKE_COMMAND(_SEQ_10, INPUT_SEL_SIU, DATA_SEL_FIFO, \
> +		NAND_PAGE_READ, _DONT_CARE, NAND_PAGE_READ_END)
> +
> +/* 4-cycle read command, together with GEN_SEQ_CTRL_READ_PAGE_4CYCLE */
> +/* Uses SEQ_18 (generic command sequence, see GEN_SEQ_ECTRL_READ_PAGE_4CYCLE)):
> + * CMD0 + 2+2 address cycles + CMD2, read data
> + */
> +#define COMMAND_READ_PAGE_GEN \
> +	MAKE_COMMAND(_SEQ_18, INPUT_SEL_SIU, DATA_SEL_FIFO, \
> +		NAND_PAGE_READ, _DONT_CARE, NAND_PAGE_READ_END)
> +
> +/* Page read via master interface (DMA) */
> +
> +/* Standard 5-cycle read command, with 0x30 end-of-cycle marker */
> +/* Uses SEQ_10: CMD0 + 5 address cycles + CMD2, read data */
> +#define COMMAND_READ_PAGE_DMA_STD \
> +	MAKE_COMMAND(_SEQ_10, INPUT_SEL_DMA, DATA_SEL_FIFO, \
> +		NAND_PAGE_READ, _DONT_CARE, NAND_PAGE_READ_END)
> +
> +/* 4-cycle read command, together with GEN_SEQ_CTRL_READ_PAGE_4CYCLE */
> +/* Uses SEQ_18 (generic command sequence, see GEN_SEQ_ECTRL_READ_PAGE_4CYCLE)):
> + * CMD0 + 2+2 address cycles + CMD2, read data
> + */
> +#define COMMAND_READ_PAGE_DMA_GEN \
> +	MAKE_COMMAND(_SEQ_18, INPUT_SEL_DMA, DATA_SEL_FIFO, \
> +		NAND_PAGE_READ, _DONT_CARE, NAND_PAGE_READ_END)
> +
> +/* Page write via master interface (DMA) */
> +
> +/* Uses SEQ_12: CMD0 + 5 address cycles + write data + CMD1 */
> +#define COMMAND_WRITE_PAGE_DMA_STD \
> +	MAKE_COMMAND(_SEQ_12, INPUT_SEL_DMA, DATA_SEL_FIFO, \
> +		NAND_PAGE_WRITE, NAND_PAGE_WRITE_END, _DONT_CARE)
> +
> +/* Uses SEQ_19: CMD0 + 4 address cycles + write data + CMD1 */
> +#define COMMAND_WRITE_PAGE_DMA_GEN \
> +	MAKE_COMMAND(_SEQ_19, INPUT_SEL_DMA, DATA_SEL_FIFO, \
> +		NAND_PAGE_WRITE, NAND_PAGE_WRITE_END, _DONT_CARE)
> +
> +/* Block erase */
> +
> +/* Uses SEQ_14: CMD0 + 3 address cycles + CMD1 */
> +#define COMMAND_BLOCK_ERASE \
> +	MAKE_COMMAND(_SEQ_14, INPUT_SEL_SIU, DATA_SEL_FIFO, \
> +		NAND_BLOCK_ERASE, NAND_BLOCK_ERASE_END, _DONT_CARE)
> +
> +/* Assembled values for putting into GEN_SEQ_CTRL register */
> +
> +/* General command sequence specification for 4 cycle PAGE_READ command */
> +#define GEN_SEQ_CTRL_READ_PAGE_4CYCLE \
> +	MAKE_GEN_CMD(1, 0, 1, 0,	/* enable command 0 and 2 phases */ \
> +		     2, 2,		/* col A0 2 cycles, row A0 2 cycles */ \
> +		     0, 0,		/* col A1, row A1 not used */ \
> +		     1,			/* data phase enabled */ \
> +		     _BUSY_0,		/* busy0 phase enabled */ \
> +		     0,			/* immediate cmd execution disabled */ \
> +		     _DONT_CARE)	/* command 3 code not needed */
> +
> +/* General command sequence specification for 4 cycle PAGE_PROGRAM command */
> +#define GEN_SEQ_CTRL_WRITE_PAGE_4CYCLE \
> +	MAKE_GEN_CMD(1, 1, 0, 0,	/* enable command 0 and 1 phases */ \
> +		     2, 2,		/* col A0 2 cycles, row A0 2 cycles */ \
> +		     0, 0,		/* col A1, row A1 not used */ \
> +		     1,			/* data phase enabled */ \
> +		     _BUSY_1,		/* busy1 phase enabled */ \
> +		     0,			/* immediate cmd execution disabled */ \
> +		     _DONT_CARE)	/* command 3 code not needed */

I think I already commented on that last time a driver for the same IP
was submitted by Oleksij.

I'm pretty sure you can implement ->cmd_ctrl() and rely on the default
cmdfunc() logic instead of manually converting those high-level NAND
commands into your own model (which seems to match pretty much the
->cmd_ctrl() model, where you can get the number of address and command
cycles).

Maybe I'm wrong, but I think it's worth a try, and if it works, it
would greatly simplify the driver.

> +
> +/* BCH ECC size calculations. */
> +/* From "Mr. NAND's Wild Ride: Warning: Suprises Ahead", by Robert Pierce,
> + * Denali Software Inc. 2009, table on page 5
> + */
> +/* Use 8 bit correction as base. */
> +#define ECC8_BYTES(BLKSIZE) (ffs(BLKSIZE) + 3)
> +/* The following would be valid for 4..24 bits of correction. */
> +#define ECC_BYTES_PACKED(CAP, BLKSIZE) ((ECC8_BYTES(BLKSIZE) * (CAP) + 7) / 8)
> +/* Our hardware however requires more bytes than strictly necessary due to
> + * the internal design.
> + */
> +#define ECC_BYTES(CAP, BLKSIZE) ((ECC8_BYTES(1024) * (CAP) + 7) / 8)
> +
> +/* Read modes */
> +enum nfc_read_mode {
> +	NFC_READ_STD, /* Standard page read with ECC */
> +	NFC_READ_RAW, /* Raw mode read of main area without ECC */
> +	NFC_READ_OOB, /* Read oob only (no ECC) */
> +	NFC_READ_ALL  /* Read main+oob in raw mode (no ECC) */
> +};
> +
> +/* Timing parameters, from DT */
> +struct nfc_timings {
> +	uint32_t time_seq_0;
> +	uint32_t time_seq_1;
> +	uint32_t timings_asyn;
> +	uint32_t time_gen_seq_0;
> +	uint32_t time_gen_seq_1;
> +	uint32_t time_gen_seq_2;
> +	uint32_t time_gen_seq_3;
> +};
> +
> +/* Configuration, from DT */
> +struct nfc_setup {
> +	struct nfc_timings timings;
> +	bool use_bank_select; /* CE selects 'bank' rather than 'chip' */
> +	bool rb_wired_and;    /* Ready/busy wired AND rather than per-chip */
> +};
> +
> +/* DMA buffer, from both software (buf) and hardware (phys) perspective. */
> +struct nfc_dma {
> +	void *buf; /* mapped address */
> +	dma_addr_t phys; /* physical address */
> +	int bytes_left; /* how much data left to read from buffer? */
> +	int buf_bytes; /* how much allocated data in the buffer? */
> +	uint8_t *ptr; /* work pointer */
> +};
> +
> +#ifndef POLLED_XFERS
> +/* Interrupt management */
> +struct nfc_irq {
> +	int done; /* interrupt triggered, consequently we're done. */
> +	uint32_t int_status; /* INT_STATUS at time of interrupt */
> +	wait_queue_head_t wq; /* For waiting on controller interrupt */
> +};
> +#endif
> +
> +/* Information common to all chips, including the NANDFLASH-CTRL IP */
> +struct nfc_info {

Should inherit from nand_hw_ctrl (see the sunxi_nand driver)...

> +	void __iomem *regbase;
> +	struct device *dev;
> +	struct nand_hw_control *controller;

... and not point to it.

> +	spinlock_t lock;
> +	struct nfc_setup *setup;
> +	struct nfc_dma dma;
> +#ifndef POLLED_XFERS
> +	struct nfc_irq irq;
> +#endif
> +};
> +
> +/* Per-chip controller configuration */
> +struct nfc_config {
> +	uint32_t mem_ctrl;
> +	uint32_t control;
> +	uint32_t ecc_ctrl;
> +	uint32_t mem_status_mask;
> +	uint32_t cs;
> +};
> +
> +/* Cache for info that we need to save across calls to nfc_command */
> +struct nfc_cmd_cache {
> +	unsigned int command;
> +	int page;
> +	int column;
> +	int write_size;
> +	int oob_required;
> +	int write_raw;
> +};
> +
> +/* Information for each physical NAND chip. */
> +struct chip_info {
> +	struct nand_chip chip;
> +	struct nfc_cmd_cache cmd_cache;
> +	struct nfc_config nfc_config;
> +};
> +
> +/* What we tell mtd is an mtd_info actually is a complete chip_info */
> +#define TO_CHIP_INFO(mtd) ((struct chip_info *)(mtd_to_nand(mtd)))

Ouch! Please, don't do that, container_of() is here for a good reason.
And prefer static inline functions over macros for this kind of things.

static inline struct chip_info *mtd_to_chip_info(struct mtd_info *mtd)
{
	return container_of(mtd_to_nand(mtd), struct chip_info, chip);
}

> +
> +/* This is a global pointer, as we only support one single instance of the NFC.
> + * For multiple instances, we would need to add nfc_info as a parameter to
> + * several functions, as well as adding it as a member of the chip_info struct.
> + * Since most likely a system would only have one NFC instance, we don't
> + * go all the way implementing that feature now.
> + */
> +static struct nfc_info *nfc_info;

Come on! Don't be so lazy, do the right thing.

> +
> +/* The timing setup is expected to come via DT. We keep some default timings
> + * here for reference, based on a 100 MHz reference clock.
> + */
> +
> +static const struct nfc_timings default_mode0_pll_enabled = {
> +	0x0d151533, 0x000b0515, 0x00000046,
> +	0x00150000, 0x00000000, 0x00000005, 0x00000015 };

Can you explain those magic values?

> +
> +/**** Utility routines. */

Please use regular comments: /* */

> +
> +/* Count the number of 0's in buff up to a max of max_bits */
> +/* Used to determine how many bitflips there are in an allegedly erased block */
> +static int count_zero_bits(uint8_t *buff, int size, int max_bits)
> +{
> +	int k, zero_bits = 0;
> +
> +	for (k = 0; k < size; k++) {
> +		zero_bits += hweight8(~buff[k]);
> +		if (zero_bits > max_bits)
> +			break;
> +	}
> +
> +	return zero_bits;
> +}

We have an helper for that [1].

> +
> +/**** Low level stuff. Read and write registers, interrupt routine, etc. */

Ditto.

> +
> +/* Read and write NFC SFR registers */
> +
> +static uint32_t nfc_read(uint reg_offset)
> +{
> +	return readl_relaxed(nfc_info->regbase + reg_offset);
> +}
> +
> +static void nfc_write(uint32_t data, uint reg_offset)
> +{
> +	/* Note: According to NANDFLASH-CTRL Design Specification, rev 1.14,
> +	 * p19, the NFC SFR's can only be written when STATUS.CTRL_STAT is 0.
> +	 * However, this doesn't seem to be an issue in practice.
> +	 */
> +	writel_relaxed(data, nfc_info->regbase  + reg_offset);
> +}

Do you really want to use the _relaxed functions?

> +
> +#ifndef POLLED_XFERS
> +static irqreturn_t nfc_irq(int irq, void *device_info)
> +{
> +	/* Note that device_info = nfc_info, so if we don't want a global
> +	 * nfc_info we can get it via device_info.
> +	 */
> +
> +	/* Save interrupt status in case caller wants to check what actually
> +	 * happened.
> +	 */
> +	nfc_info->irq.int_status = nfc_read(INT_STATUS_REG);
> +
> +	MTD_TRACE("Got interrupt %d, INT_STATUS 0x%08x\n",
> +		  irq, nfc_info->irq.int_status);
> +
> +	/* Note: We can't clear the interrupts by clearing CONTROL.INT_EN,
> +	 * as that does not disable the interrupt output port from the
> +	 * nfc towards the gic.
> +	 */
> +	nfc_write(0, INT_STATUS_REG);
> +
> +	nfc_info->irq.done = 1;
> +	wake_up(&nfc_info->irq.wq);
> +
> +	return IRQ_HANDLED;
> +}
> +#endif

Remove the #ifndef section, since it's always activated (see my first
comment).

> +
> +/* Get resources from platform: register bank mapping, irqs, etc */
> +static int nfc_init_resources(struct platform_device *pdev)
> +{
> +	struct device *dev = &pdev->dev;
> +	struct resource *resource;
> +#ifndef POLLED_XFERS
> +	int irq;
> +	int res;
> +#endif

Ditto.

> +
> +	/* Register base for controller, ultimately from device tree */
> +	resource = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	if (!resource) {
> +		dev_err(dev, "No register addresses configured!\n");
> +		return -ENOMEM;
> +	}
> +	nfc_info->regbase = devm_ioremap_resource(dev, resource);
> +	if (IS_ERR(nfc_info->regbase))
> +		return PTR_ERR(nfc_info->regbase);
> +
> +	dev_dbg(dev, "Got SFRs at phys %p..%p, mapped to %p\n",
> +		 (void *)resource->start, (void *)resource->end,
> +		 nfc_info->regbase);
> +
> +	/* A DMA buffer */
> +	nfc_info->dma.buf =
> +		dma_alloc_coherent(dev, DMA_BUF_SIZE,
> +				   &nfc_info->dma.phys, GFP_KERNEL);
> +	if (nfc_info->dma.buf == NULL) {
> +		dev_err(dev, "dma_alloc_coherent failed!\n");
> +		return -ENOMEM;
> +	}
> +
> +	dev_dbg(dev, "DMA buffer %p at physical %p\n",
> +		 nfc_info->dma.buf, (void *)nfc_info->dma.phys);
> +
> +#ifndef POLLED_XFERS
> +	irq = platform_get_irq(pdev, 0);
> +	if (irq < 0) {
> +		dev_err(dev, "No irq configured\n");
> +		return irq;
> +	}
> +	res = request_irq(irq, nfc_irq, 0, "evatronix-nand", nfc_info);

devm_?

> +	if (res < 0) {
> +		dev_err(dev, "request_irq failed\n");
> +		return res;
> +	}
> +	dev_dbg(dev, "Successfully registered IRQ %d\n", irq);
> +#endif
> +
> +	return 0;
> +}

I'm stopping there for now.

Can you please remove everything that is not strictly required and
clean the things I pointed before submitting a new version.

To be honest, your driver seems really complicated compared to what
it's supposed to do, and I suspect it could be a lot simpler, but
again, maybe I'm wrong.

If you didn't try yet, please investigate the ->cmd_ctrl() approach,
and if you did, could you explain why it didn't work out?

Regards,

Boris

[1]http://lxr.free-electrons.com/source/drivers/mtd/nand/nand_base.c#L1183

-- 
Boris Brezillon, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/4] mtd: nand: Add support for Evatronix NANDFLASH-CTRL
  2016-06-03 14:59 ` Boris Brezillon
@ 2016-06-09  8:19   ` Ricard Wanderlof
  2016-06-09  9:08     ` Boris Brezillon
  2016-06-09 17:24     ` Mychaela Falconia
  0 siblings, 2 replies; 18+ messages in thread
From: Ricard Wanderlof @ 2016-06-09  8:19 UTC (permalink / raw)
  To: Boris Brezillon
  Cc: Brian Norris, David Woodhouse, Benoit Cousson, Tony Lindgren,
	devicetree, Linux mtd, linux-kernel, Oleksij Rempel


Hi Boris,

Again, thanks for reviewing this.

On Fri, 3 Jun 2016, Boris Brezillon wrote:

> >  drivers/mtd/nand/Kconfig          |    6 +
> >  drivers/mtd/nand/Makefile         |    1 +
> >  drivers/mtd/nand/evatronix_nand.c | 1909 +++++++++++++++++++++++++++++++++++++
> >  3 files changed, 1916 insertions(+)
> >  create mode 100644 drivers/mtd/nand/evatronix_nand.c
> 
> Please run checkpatch.pl and fix all the ERRORS and WARNINGS.

I did run checkpatch.pl (at least the one in the mtd l2 tree), and there 
should be no outstanding errors. Some of the warnings are related to lines 
that are more than 80 characters which contain printouts which I don't 
want to split as it makes them hard to grep for.

There is a warning regarding the KConfig entry though, which seems to 
indicate that the description is missing - perhaps it just means that the 
help text is too short (although it's not shorter than many other NAND 
drivers in the same file)?

There are a couple of BUG()s though which are all of the type 'things that 
shouldn't happen' (e.g.. an enum having a value outside its range), so 
there's no real way to recover, however, one could always return early 
from the function in question and hope for the best.

I see now that there's a comment on an overly long line in the commit 
message that I've missed, as is a function call that's one character over 
the 80 character limit.

I usually consider checkpatch.pl to be of guidence rather than a sentinel 
when it comes to warnings, but if you want a '0 warnings' approach I can 
certainly accomplish that.

> > +#include <asm/dma.h>
> > +#include <linux/bitops.h> /* for ffs() */
> > +#include <linux/io.h>
> > +#include <linux/dma-mapping.h>
> > +#include <linux/err.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/module.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/of.h>
> > +#include <linux/slab.h>
> > +#include <linux/mtd/mtd.h>
> > +#include <linux/mtd/nand.h>
> > +#include <linux/mtd/concat.h>
> > +#include <linux/mtd/partitions.h>
> > +#include <linux/version.h>
> 
> You seem to include a lot of things, and even asm headers. Please make
> sure you really need them.

Ok, will do.

> > +/* Some of this could potentially be moved to DT, but it represents stuff
> > + * that is either untested, only used for debugging, or things we really
> > + * don't want anyone to change, so we keep it here until a clear use case
> > + * emerges.
> > + */
> > +
> > +#undef NFC_DMA64BIT /* NFC hardware support for 64-bit DMA transfers */
> > +
> > +#undef POLLED_XFERS /* Use polled rather than interrupt based transfers */
> > +
> > +#undef CLEAR_DMA_BUF_AFTER_WRITE /* Useful for debugging */
> 
> Then simply drop the code in those sections and add it back when it's
> been tested.

NFC_DMA64BIT is untested so I'll take it out.

CLEAR_DMA_BUF_AFTER_WRITE is useful when debugging (and tested), and 
POLLED_XFERS is also tested but normally only used for debugging (if for 
instance there's a problem with interrupts). I don't really like to throw 
out code that's useful because it's unnecessary work to have to put it in 
again when the need arises. I could change the wording in the comment to 
make it clearer though (especially after having removed NFC_DMA64BIT) if 
that is enough?

The rationale for leaving this code in is that it can help bring up a new 
unknown system with this IP, because I consider the most likely re-use of 
this driver to be for someone who is developing a new SoC, as it seems 
rather uncommon in commercially available chips.

> > +/* DMA buffer for page transfers. */
> > +#define DMA_BUF_SIZE (8192 + 640) /* main + spare for 8k page flash */
> 
> This should clearly be dynamic.

8k pages are the largest the controller can handle, and there's only a 
single DMA buffer for the controller, so it's a very small amount of 
memory, and I didn't feel it worth the complexity to reduce the size just 
because a smaller paged flash was encountered. The driver uses the DMA 
buffer to read the ID data so it needs a buffer anyway before the page 
size has been determined. But if you feel it's important I can rework it - 
there would have to be two buffers, one smaller one for reading the ID and 
a larger one subsequently allocated for page data.

> > +
> > +/* Debugging */
> > +
> > +#define MTD_TRACE(FORMAT, ...) pr_debug("%s: " FORMAT, __func__, ## __VA_ARGS__)
> 
> Hm, I'm not a big fan of those custom pr_debug() macros, but if you
> really wan to keep it you shouldn't prefix it with MTD_.

Ok. I was thinking about replacing it with pr_debug straight off, but saw 
that there were other drivers with custom debug macros so I left it in. 
I'll replace it with pr_debug then.

> Reading at the code I see a lot of MTD_TRACE() calls, while I'm not
> against debug traces, it seems to me that you've kept traces you used
> while developing/debugging your implementation. Can you clean it up and
> keep only the relevant ones.

It's true that the debug traces were initially created during driver 
development, however I have gone through the debug printouts and consider 
the ones remaining to be relevant, especially if one is trying to debug a 
new previously untested system with this IP. Was there any particular 
one(s) you were thinking of?

> > +	MAKE_COMMAND(_SEQ_14, INPUT_SEL_SIU, DATA_SEL_FIFO, \
> > +		NAND_BLOCK_ERASE, NAND_BLOCK_ERASE_END, _DONT_CARE)
> > +
> > +/* Assembled values for putting into GEN_SEQ_CTRL register */
> > +
> > +/* General command sequence specification for 4 cycle PAGE_READ command */
> > +#define GEN_SEQ_CTRL_READ_PAGE_4CYCLE \
> > +	MAKE_GEN_CMD(1, 0, 1, 0,	/* enable command 0 and 2 phases */ \
> > +		     2, 2,		/* col A0 2 cycles, row A0 2 cycles */ \
> > +		     0, 0,		/* col A1, row A1 not used */ \
> > +		     1,			/* data phase enabled */ \
> > +		     _BUSY_0,		/* busy0 phase enabled */ \
> > +		     0,			/* immediate cmd execution disabled */ \
> > +		     _DONT_CARE)	/* command 3 code not needed */
> > +
> > +/* General command sequence specification for 4 cycle PAGE_PROGRAM command */
> > +#define GEN_SEQ_CTRL_WRITE_PAGE_4CYCLE \
> > +	MAKE_GEN_CMD(1, 1, 0, 0,	/* enable command 0 and 1 phases */ \
> > +		     2, 2,		/* col A0 2 cycles, row A0 2 cycles */ \
> > +		     0, 0,		/* col A1, row A1 not used */ \
> > +		     1,			/* data phase enabled */ \
> > +		     _BUSY_1,		/* busy1 phase enabled */ \
> > +		     0,			/* immediate cmd execution disabled */ \
> > +		     _DONT_CARE)	/* command 3 code not needed */
> 
> I think I already commented on that last time a driver for the same IP 
> was submitted by Oleksij.

(Just to clarify: this doesn't have any bearing on this particular issue, 
but the SoC Oleksij submitted a driver for used a rather different 
(probably older) version of the same IP. The only documentation I saw for 
that SoC was the IP register map, and there were several differences in 
not only the register addresses but also the available registers and bit 
fields, and it did not look like one was a subset of the other. So it 
looked more like the IP vendor had done an at least partial rewrite of the 
internal logic between the two versions.)

> I'm pretty sure you can implement ->cmd_ctrl() and rely on the default
> cmdfunc() logic instead of manually converting those high-level NAND
> commands into your own model (which seems to match pretty much the
> ->cmd_ctrl() model, where you can get the number of address and command
> cycles).
> 
> Maybe I'm wrong, but I think it's worth a try, and if it works, it
> would greatly simplify the driver.

It's not so much trying as reading the manual for the IP. :-)

The problem here is that the ->cmd_ctrl() logic assumes that you can pass 
single bytes transparently via the IP and also directly control the state 
of the ALE and CLE lines, as well as set up data transfers independently 
of that, none of which is not possible with this IP. Instead, the IP 
offers a number of fixed interface sequences, some more programmable than 
others, a subset of which are used in this driver, which do all the heavy 
lifting. There is for instance no sequence which just writes or reads data 
to or from the flash, there's always at least a command write (CLE) 
included.

(The command definitions in the macro sections are more or less direct 
translations from a table and section in the IP manual describing how to 
accomplish various functions.)

If there had been a transparent mode I would certainly have gone that 
route, at least initially, rather than mess about with all this controller 
specific stuff. I think that's why Oleksij arrived at a similar solution 
(or possibly he used a non-Linux driver as a starting point).

The designers of this IP apparantly did not have Linux in mind when they 
designed the controller, since it does all the low level stuff 
autonomously (in the right IP configuration it can even remap flash blocks 
transparently), with no way of intervening. For instance, when doing 
hardware ECC, the OOB data is not available anywhere to the user, and if 
one wants to actually read it a separate OOB write needs to be done. I 
think the target market for the IP is really a general real time OS where 
there is no NAND driver available, and you just want to fire off a single 
high level command, wait for an interrupt, and have your data waiting for 
you.

This latter property is actually advantageous in Linux too as the driver 
doesn't have to do bit- and byte-banging against the NAND flash. I'm not 
sure what the gain in overhead is in practice, but at any rate there's not 
much of a choice.

> > +/* Information common to all chips, including the NANDFLASH-CTRL IP */
> > +struct nfc_info {
> 
> Should inherit from nand_hw_ctrl (see the sunxi_nand driver)...
> 
> > +	void __iomem *regbase;
> > +	struct device *dev;
> > +	struct nand_hw_control *controller;
> 
> ... and not point to it.

Ok. I'm a bit unsure what you mean by 'inherit' though; in the sunxi 
driver the struct nand_hw_controller is contained within the struct 
sunxi_nfc, in my case there's a pointer to a single instance of a 
nand_hw_control. I agree that my approach is wasteful on a dynamic 
allocation and I have no problems changing it, but conceptually there's 
not much of a difference.

> > +
> > +/* What we tell mtd is an mtd_info actually is a complete chip_info */
> > +#define TO_CHIP_INFO(mtd) ((struct chip_info *)(mtd_to_nand(mtd)))
> 
> Ouch! Please, don't do that, container_of() is here for a good reason.
> And prefer static inline functions over macros for this kind of things.
> 
> static inline struct chip_info *mtd_to_chip_info(struct mtd_info *mtd)
> {
> 	return container_of(mtd_to_nand(mtd), struct chip_info, chip);
> }

Ok. Will change.

> > +
> > +/* This is a global pointer, as we only support one single instance of the NFC.
> > + * For multiple instances, we would need to add nfc_info as a parameter to
> > + * several functions, as well as adding it as a member of the chip_info struct.
> > + * Since most likely a system would only have one NFC instance, we don't
> > + * go all the way implementing that feature now.
> > + */
> > +static struct nfc_info *nfc_info;
> 
> Come on! Don't be so lazy, do the right thing.

It's not a question of laziness, it was a conscious decision: why add 
unnecessary bloat and complexity for a case that probably will never 
occur? I can certainly change it if you think it's worth while of course.

> > +
> > +/* The timing setup is expected to come via DT. We keep some default timings
> > + * here for reference, based on a 100 MHz reference clock.
> > + */
> > +
> > +static const struct nfc_timings default_mode0_pll_enabled = {
> > +	0x0d151533, 0x000b0515, 0x00000046,
> > +	0x00150000, 0x00000000, 0x00000005, 0x00000015 };
> 
> Can you explain those magic values?

Not really, the problem is that the agreement we have with the IP vendor 
is that we may not disclose any documentation, outside of what is 
absolutely necessary to write working code.

A rationale is that anyone else wanting to use the driver will either be 
designing their own SoC in which case they will have access to the 
relevant documentation, or if they're using a SoC from someone else, the 
SoC vendor will have to provide that information in order for the chip to 
be useful anyway.

> > +
> > +/**** Utility routines. */
> 
> Please use regular comments: /* */

Ugh. Yes, sorry.

> > +
> > +/* Count the number of 0's in buff up to a max of max_bits */
> > +/* Used to determine how many bitflips there are in an allegedly erased block */
> > +static int count_zero_bits(uint8_t *buff, int size, int max_bits)
> > +{
> > +	int k, zero_bits = 0;
> > +
> > +	for (k = 0; k < size; k++) {
> > +		zero_bits += hweight8(~buff[k]);
> > +		if (zero_bits > max_bits)
> > +			break;
> > +	}
> > +
> > +	return zero_bits;
> > +}
> 
> We have an helper for that [1].

Great, I'll use that. (I don't think it existed when the first version of 
this driver was written).

> > +
> > +/* Read and write NFC SFR registers */
> > +
> > +static uint32_t nfc_read(uint reg_offset)
> > +{
> > +	return readl_relaxed(nfc_info->regbase + reg_offset);
> > +}
> > +
> > +static void nfc_write(uint32_t data, uint reg_offset)
> > +{
> > +	/* Note: According to NANDFLASH-CTRL Design Specification, rev 1.14,
> > +	 * p19, the NFC SFR's can only be written when STATUS.CTRL_STAT is 0.
> > +	 * However, this doesn't seem to be an issue in practice.
> > +	 */
> > +	writel_relaxed(data, nfc_info->regbase  + reg_offset);
> > +}
> 
> Do you really want to use the _relaxed functions?

Yes, using _relaxed, together with explicit memory barriers in the DMA 
routines has shown a 5% increase in flash read performance on an ARM 
system, the reason being that on an ARM system the implicit memory barrier 
in the writel() call causes a fairly heavy penalty in terms of flushing 
the L2 cache.

> > +	res = request_irq(irq, nfc_irq, 0, "evatronix-nand", nfc_info);
> 
> devm_?

Yes, good catch, thanks!

> To be honest, your driver seems really complicated compared to what
> it's supposed to do, and I suspect it could be a lot simpler, but
> again, maybe I'm wrong.
> 
> If you didn't try yet, please investigate the ->cmd_ctrl() approach,
> and if you did, could you explain why it didn't work out?

Given that the controller does not have the transparency that the 
->cmd_ctrl() approach requires, as noted above, I can't see how it could 
be simplified.

I basically need to grab everything needed for a given operation and 
interpret it before handing it over to the controller. I considered using 
a higher level API, by replacing the default ->cmdfunc() (default 
nand_command/nand_command_lp) with a specific version, which would have 
avoided the need to interpret the NAND commands arriving via ->cmd_ctrl(), 
but that meant duplicating some of the logic in nand_base.c which seemed 
like a bad idea.

> Can you please remove everything that is not strictly required and
> clean the things I pointed before submitting a new version.

Sure, but I would very much like some feedback on the points I've raised 
above before going on with that.

/Ricard
-- 
Ricard Wolf Wanderlöf                           ricardw(at)axis.com
Axis Communications AB, Lund, Sweden            www.axis.com
Phone +46 46 272 2016                           Fax +46 46 13 61 30

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/4] mtd: nand: Add support for Evatronix NANDFLASH-CTRL
  2016-06-09  8:19   ` Ricard Wanderlof
@ 2016-06-09  9:08     ` Boris Brezillon
  2016-06-10 14:40       ` Ricard Wanderlof
  2016-06-09 17:24     ` Mychaela Falconia
  1 sibling, 1 reply; 18+ messages in thread
From: Boris Brezillon @ 2016-06-09  9:08 UTC (permalink / raw)
  To: Ricard Wanderlof
  Cc: devicetree, Tony Lindgren, linux-kernel, Oleksij Rempel,
	Linux mtd, Benoit Cousson, Brian Norris, David Woodhouse

On Thu, 9 Jun 2016 10:19:51 +0200
Ricard Wanderlof <ricard.wanderlof@axis.com> wrote:

> Hi Boris,
> 
> Again, thanks for reviewing this.
> 
> On Fri, 3 Jun 2016, Boris Brezillon wrote:
> 
> > >  drivers/mtd/nand/Kconfig          |    6 +
> > >  drivers/mtd/nand/Makefile         |    1 +
> > >  drivers/mtd/nand/evatronix_nand.c | 1909 +++++++++++++++++++++++++++++++++++++
> > >  3 files changed, 1916 insertions(+)
> > >  create mode 100644 drivers/mtd/nand/evatronix_nand.c  
> > 
> > Please run checkpatch.pl and fix all the ERRORS and WARNINGS.  
> 
> I did run checkpatch.pl (at least the one in the mtd l2 tree), and there 
> should be no outstanding errors. Some of the warnings are related to lines 
> that are more than 80 characters which contain printouts which I don't 
> want to split as it makes them hard to grep for.
> 
> There is a warning regarding the KConfig entry though, which seems to 
> indicate that the description is missing - perhaps it just means that the 
> help text is too short (although it's not shorter than many other NAND 
> drivers in the same file)?
> 
> There are a couple of BUG()s though which are all of the type 'things that 
> shouldn't happen' (e.g.. an enum having a value outside its range), so 
> there's no real way to recover, however, one could always return early 
> from the function in question and hope for the best.
> 
> I see now that there's a comment on an overly long line in the commit 
> message that I've missed, as is a function call that's one character over 
> the 80 character limit.
> 
> I usually consider checkpatch.pl to be of guidence rather than a sentinel 
> when it comes to warnings, but if you want a '0 warnings' approach I can 
> certainly accomplish that.

Well, I'm not asking to fix all 80 chars warnings, but I see a lot of
warnings and errors in there [1], and I'm pretty sure most of them can
be addressed in a sane way.

> 
> > > +#include <asm/dma.h>
> > > +#include <linux/bitops.h> /* for ffs() */
> > > +#include <linux/io.h>
> > > +#include <linux/dma-mapping.h>
> > > +#include <linux/err.h>
> > > +#include <linux/interrupt.h>
> > > +#include <linux/module.h>
> > > +#include <linux/platform_device.h>
> > > +#include <linux/of.h>
> > > +#include <linux/slab.h>
> > > +#include <linux/mtd/mtd.h>
> > > +#include <linux/mtd/nand.h>
> > > +#include <linux/mtd/concat.h>
> > > +#include <linux/mtd/partitions.h>
> > > +#include <linux/version.h>  
> > 
> > You seem to include a lot of things, and even asm headers. Please make
> > sure you really need them.  
> 
> Ok, will do.
> 
> > > +/* Some of this could potentially be moved to DT, but it represents stuff
> > > + * that is either untested, only used for debugging, or things we really
> > > + * don't want anyone to change, so we keep it here until a clear use case
> > > + * emerges.
> > > + */
> > > +
> > > +#undef NFC_DMA64BIT /* NFC hardware support for 64-bit DMA transfers */
> > > +
> > > +#undef POLLED_XFERS /* Use polled rather than interrupt based transfers */
> > > +
> > > +#undef CLEAR_DMA_BUF_AFTER_WRITE /* Useful for debugging */  
> > 
> > Then simply drop the code in those sections and add it back when it's
> > been tested.  
> 
> NFC_DMA64BIT is untested so I'll take it out.
> 
> CLEAR_DMA_BUF_AFTER_WRITE is useful when debugging (and tested), and 
> POLLED_XFERS is also tested but normally only used for debugging (if for 
> instance there's a problem with interrupts). I don't really like to throw 
> out code that's useful because it's unnecessary work to have to put it in 
> again when the need arises. I could change the wording in the comment to 
> make it clearer though (especially after having removed NFC_DMA64BIT) if 
> that is enough?
> 
> The rationale for leaving this code in is that it can help bring up a new 
> unknown system with this IP, because I consider the most likely re-use of 
> this driver to be for someone who is developing a new SoC, as it seems 
> rather uncommon in commercially available chips.

That's not how it works. Either you provide a sane way to activate
these options (using Kconfig entries), or your drop dead-code sections.

I understand that debugging is important to you, but adding hundreds
lines of unused code is also hurting readability, so I keep thinking
that these options should either be removed (with the associated code
sections) or exposed as Kconfig options.

> 
> > > +/* DMA buffer for page transfers. */
> > > +#define DMA_BUF_SIZE (8192 + 640) /* main + spare for 8k page flash */  
> > 
> > This should clearly be dynamic.  
> 
> 8k pages are the largest the controller can handle, and there's only a 
> single DMA buffer for the controller, so it's a very small amount of 
> memory, and I didn't feel it worth the complexity to reduce the size just 
> because a smaller paged flash was encountered. The driver uses the DMA 
> buffer to read the ID data so it needs a buffer anyway before the page 
> size has been determined. But if you feel it's important I can rework it - 
> there would have to be two buffers, one smaller one for reading the ID and 
> a larger one subsequently allocated for page data.

If you reject all NAND above 8k + 640 oob bytes I'm fine with this
fixed size.

> 
> > > +
> > > +/* Debugging */
> > > +
> > > +#define MTD_TRACE(FORMAT, ...) pr_debug("%s: " FORMAT, __func__, ## __VA_ARGS__)  
> > 
> > Hm, I'm not a big fan of those custom pr_debug() macros, but if you
> > really wan to keep it you shouldn't prefix it with MTD_.  
> 
> Ok. I was thinking about replacing it with pr_debug straight off, but saw 
> that there were other drivers with custom debug macros so I left it in. 
> I'll replace it with pr_debug then.
> 
> > Reading at the code I see a lot of MTD_TRACE() calls, while I'm not
> > against debug traces, it seems to me that you've kept traces you used
> > while developing/debugging your implementation. Can you clean it up and
> > keep only the relevant ones.  
> 
> It's true that the debug traces were initially created during driver 
> development, however I have gone through the debug printouts and consider 
> the ones remaining to be relevant, especially if one is trying to debug a 
> new previously untested system with this IP. Was there any particular 
> one(s) you were thinking of?

I don't have any example, there just seem to be a lot of them. I'll
have a closer look.

> 
> > > +	MAKE_COMMAND(_SEQ_14, INPUT_SEL_SIU, DATA_SEL_FIFO, \
> > > +		NAND_BLOCK_ERASE, NAND_BLOCK_ERASE_END, _DONT_CARE)
> > > +
> > > +/* Assembled values for putting into GEN_SEQ_CTRL register */
> > > +
> > > +/* General command sequence specification for 4 cycle PAGE_READ command */
> > > +#define GEN_SEQ_CTRL_READ_PAGE_4CYCLE \
> > > +	MAKE_GEN_CMD(1, 0, 1, 0,	/* enable command 0 and 2 phases */ \
> > > +		     2, 2,		/* col A0 2 cycles, row A0 2 cycles */ \
> > > +		     0, 0,		/* col A1, row A1 not used */ \
> > > +		     1,			/* data phase enabled */ \
> > > +		     _BUSY_0,		/* busy0 phase enabled */ \
> > > +		     0,			/* immediate cmd execution disabled */ \
> > > +		     _DONT_CARE)	/* command 3 code not needed */
> > > +
> > > +/* General command sequence specification for 4 cycle PAGE_PROGRAM command */
> > > +#define GEN_SEQ_CTRL_WRITE_PAGE_4CYCLE \
> > > +	MAKE_GEN_CMD(1, 1, 0, 0,	/* enable command 0 and 1 phases */ \
> > > +		     2, 2,		/* col A0 2 cycles, row A0 2 cycles */ \
> > > +		     0, 0,		/* col A1, row A1 not used */ \
> > > +		     1,			/* data phase enabled */ \
> > > +		     _BUSY_1,		/* busy1 phase enabled */ \
> > > +		     0,			/* immediate cmd execution disabled */ \
> > > +		     _DONT_CARE)	/* command 3 code not needed */  
> > 
> > I think I already commented on that last time a driver for the same IP 
> > was submitted by Oleksij.  
> 
> (Just to clarify: this doesn't have any bearing on this particular issue, 
> but the SoC Oleksij submitted a driver for used a rather different 
> (probably older) version of the same IP. The only documentation I saw for 
> that SoC was the IP register map, and there were several differences in 
> not only the register addresses but also the available registers and bit 
> fields, and it did not look like one was a subset of the other. So it 
> looked more like the IP vendor had done an at least partial rewrite of the 
> internal logic between the two versions.)
> 
> > I'm pretty sure you can implement ->cmd_ctrl() and rely on the default
> > cmdfunc() logic instead of manually converting those high-level NAND
> > commands into your own model (which seems to match pretty much the  
> > ->cmd_ctrl() model, where you can get the number of address and command  
> > cycles).
> > 
> > Maybe I'm wrong, but I think it's worth a try, and if it works, it
> > would greatly simplify the driver.  
> 
> It's not so much trying as reading the manual for the IP. :-)
> 
> The problem here is that the ->cmd_ctrl() logic assumes that you can pass 
> single bytes transparently via the IP and also directly control the state 
> of the ALE and CLE lines, as well as set up data transfers independently 
> of that, none of which is not possible with this IP. Instead, the IP 
> offers a number of fixed interface sequences, some more programmable than 
> others, a subset of which are used in this driver, which do all the heavy 
> lifting. There is for instance no sequence which just writes or reads data 
> to or from the flash, there's always at least a command write (CLE) 
> included.
> 
> (The command definitions in the macro sections are more or less direct 
> translations from a table and section in the IP manual describing how to 
> accomplish various functions.)
> 
> If there had been a transparent mode I would certainly have gone that 
> route, at least initially, rather than mess about with all this controller 
> specific stuff. I think that's why Oleksij arrived at a similar solution 
> (or possibly he used a non-Linux driver as a starting point).
> 
> The designers of this IP apparantly did not have Linux in mind when they 
> designed the controller, since it does all the low level stuff 
> autonomously (in the right IP configuration it can even remap flash blocks 
> transparently), with no way of intervening. For instance, when doing 
> hardware ECC, the OOB data is not available anywhere to the user, and if 
> one wants to actually read it a separate OOB write needs to be done. I 
> think the target market for the IP is really a general real time OS where 
> there is no NAND driver available, and you just want to fire off a single 
> high level command, wait for an interrupt, and have your data waiting for 
> you.
> 
> This latter property is actually advantageous in Linux too as the driver 
> doesn't have to do bit- and byte-banging against the NAND flash. I'm not 
> sure what the gain in overhead is in practice, but at any rate there's not 
> much of a choice.

Just to be clear, you don't have to toggle the pins each time
->cmd_ctrl() is called, you can cache the operations and launch it once
it says it's dones (don't remember the flag).

Now, I agree that it's not perfect, and it would certainly be better to
have the whole thing packed together (including the data transfer
size). I'm working on it ;).

What made me think using ->cmd_ctrl() would be simpler it the fact that
you're extracting address and cmd cycles from the cmd type, and this is
something ->cmd_ctrl() is already providing (see what's done here [2]).

> 
> > > +/* Information common to all chips, including the NANDFLASH-CTRL IP */
> > > +struct nfc_info {  
> > 
> > Should inherit from nand_hw_ctrl (see the sunxi_nand driver)...
> >   
> > > +	void __iomem *regbase;
> > > +	struct device *dev;
> > > +	struct nand_hw_control *controller;  
> > 
> > ... and not point to it.  
> 
> Ok. I'm a bit unsure what you mean by 'inherit' though; in the sunxi 
> driver the struct nand_hw_controller is contained within the struct 
> sunxi_nfc, in my case there's a pointer to a single instance of a 
> nand_hw_control. I agree that my approach is wasteful on a dynamic 
> allocation and I have no problems changing it, but conceptually there's 
> not much of a difference.

There's a huge different. By embedding the nand_hw_control struct into
your nfc_info object you allow things like:

static int get_nfc(struct nand_chip *chip)
{
	return container_of(chip->controller, struct nfc_info,
			    controller);
}

This way you can retrieve the nfc_info object attached to the nand_chip.

> 
> > > +
> > > +/* What we tell mtd is an mtd_info actually is a complete chip_info */
> > > +#define TO_CHIP_INFO(mtd) ((struct chip_info *)(mtd_to_nand(mtd)))  
> > 
> > Ouch! Please, don't do that, container_of() is here for a good reason.
> > And prefer static inline functions over macros for this kind of things.
> > 
> > static inline struct chip_info *mtd_to_chip_info(struct mtd_info *mtd)
> > {
> > 	return container_of(mtd_to_nand(mtd), struct chip_info, chip);
> > }  
> 
> Ok. Will change.
> 
> > > +
> > > +/* This is a global pointer, as we only support one single instance of the NFC.
> > > + * For multiple instances, we would need to add nfc_info as a parameter to
> > > + * several functions, as well as adding it as a member of the chip_info struct.
> > > + * Since most likely a system would only have one NFC instance, we don't
> > > + * go all the way implementing that feature now.
> > > + */
> > > +static struct nfc_info *nfc_info;  
> > 
> > Come on! Don't be so lazy, do the right thing.  
> 
> It's not a question of laziness, it was a conscious decision: why add 
> unnecessary bloat and complexity for a case that probably will never 
> occur? I can certainly change it if you think it's worth while of course.

It is. And you're okay bloating the code with dead-code, but not with
implementing this in order to avoid singletons when it clearly
shouldn't be?

> 
> > > +
> > > +/* The timing setup is expected to come via DT. We keep some default timings
> > > + * here for reference, based on a 100 MHz reference clock.
> > > + */
> > > +
> > > +static const struct nfc_timings default_mode0_pll_enabled = {
> > > +	0x0d151533, 0x000b0515, 0x00000046,
> > > +	0x00150000, 0x00000000, 0x00000005, 0x00000015 };  
> > 
> > Can you explain those magic values?  
> 
> Not really, the problem is that the agreement we have with the IP vendor 
> is that we may not disclose any documentation, outside of what is 
> absolutely necessary to write working code.
> 
> A rationale is that anyone else wanting to use the driver will either be 
> designing their own SoC in which case they will have access to the 
> relevant documentation, or if they're using a SoC from someone else, the 
> SoC vendor will have to provide that information in order for the chip to 
> be useful anyway.

Hm, so I'll have a new table like that for each new SoC using this IP?
I must say I don't like the idea, but let's address the other aspects
first.

> 
> > > +
> > > +/**** Utility routines. */  
> > 
> > Please use regular comments: /* */  
> 
> Ugh. Yes, sorry.
> 
> > > +
> > > +/* Count the number of 0's in buff up to a max of max_bits */
> > > +/* Used to determine how many bitflips there are in an allegedly erased block */
> > > +static int count_zero_bits(uint8_t *buff, int size, int max_bits)
> > > +{
> > > +	int k, zero_bits = 0;
> > > +
> > > +	for (k = 0; k < size; k++) {
> > > +		zero_bits += hweight8(~buff[k]);
> > > +		if (zero_bits > max_bits)
> > > +			break;
> > > +	}
> > > +
> > > +	return zero_bits;
> > > +}  
> > 
> > We have an helper for that [1].  
> 
> Great, I'll use that. (I don't think it existed when the first version of 
> this driver was written).
> 
> > > +
> > > +/* Read and write NFC SFR registers */
> > > +
> > > +static uint32_t nfc_read(uint reg_offset)
> > > +{
> > > +	return readl_relaxed(nfc_info->regbase + reg_offset);
> > > +}
> > > +
> > > +static void nfc_write(uint32_t data, uint reg_offset)
> > > +{
> > > +	/* Note: According to NANDFLASH-CTRL Design Specification, rev 1.14,
> > > +	 * p19, the NFC SFR's can only be written when STATUS.CTRL_STAT is 0.
> > > +	 * However, this doesn't seem to be an issue in practice.
> > > +	 */
> > > +	writel_relaxed(data, nfc_info->regbase  + reg_offset);
> > > +}  
> > 
> > Do you really want to use the _relaxed functions?  
> 
> Yes, using _relaxed, together with explicit memory barriers in the DMA 
> routines has shown a 5% increase in flash read performance on an ARM 
> system, the reason being that on an ARM system the implicit memory barrier 
> in the writel() call causes a fairly heavy penalty in terms of flushing 
> the L2 cache.

Ok, as long as you know what you're doing, I'm fine with it.

> 
> > > +	res = request_irq(irq, nfc_irq, 0, "evatronix-nand", nfc_info);  
> > 
> > devm_?  
> 
> Yes, good catch, thanks!
> 
> > To be honest, your driver seems really complicated compared to what
> > it's supposed to do, and I suspect it could be a lot simpler, but
> > again, maybe I'm wrong.
> > 
> > If you didn't try yet, please investigate the ->cmd_ctrl() approach,
> > and if you did, could you explain why it didn't work out?  
> 
> Given that the controller does not have the transparency that the 
> ->cmd_ctrl() approach requires, as noted above, I can't see how it could   
> be simplified.

I'm not totally convinced, but I'll have to go through all the macros
into more details to be sure.

> 
> I basically need to grab everything needed for a given operation and 
> interpret it before handing it over to the controller. I considered using 
> a higher level API, by replacing the default ->cmdfunc() (default 
> nand_command/nand_command_lp) with a specific version, which would have 
> avoided the need to interpret the NAND commands arriving via ->cmd_ctrl(), 
> but that meant duplicating some of the logic in nand_base.c which seemed 
> like a bad idea.

Yes. As said above, I'm planning to rework the NAND framework to
support things like:

struct nand_operation {
	u8 cmds[2];
	u8 addrs[5];
	int ncmds;
	int naddrs;
	union {
		void *out;
		const void *in;
	};
	enum nand_op_direction dir;
}

->exec_op(struct nand_operation *op);

This way the NAND controller would have all the necessary information
to trigger the whole operation (omitted the ECC info on purpose, to
make it clearer).

But this is not there yet, and in the meantime, if possible, I'd prefer
seeing drivers implementing the ->cmd_ctrl() function instead of
overloading the default ->cmdfunc() implementation.

Regards,

Boris

[1]http://code.bulix.org/eqd4ce-100790
[2]https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/drivers/mtd/nand/sunxi_nand.c?id=refs/tags/v4.7-rc2#n517

-- 
Boris Brezillon, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/4] mtd: nand: Add support for Evatronix NANDFLASH-CTRL
  2016-06-09  8:19   ` Ricard Wanderlof
  2016-06-09  9:08     ` Boris Brezillon
@ 2016-06-09 17:24     ` Mychaela Falconia
  2016-06-09 18:01       ` Boris Brezillon
  1 sibling, 1 reply; 18+ messages in thread
From: Mychaela Falconia @ 2016-06-09 17:24 UTC (permalink / raw)
  To: Ricard Wanderlof
  Cc: Boris Brezillon, devicetree, Tony Lindgren, linux-kernel,
	Oleksij Rempel, Linux mtd, Benoit Cousson, Brian Norris,
	David Woodhouse

On 6/9/16, Ricard Wanderlof <ricard.wanderlof@axis.com> wrote:
> The designers of this IP apparantly did not have Linux in mind when they
> designed the controller, since it does all the low level stuff
> autonomously (in the right IP configuration it can even remap flash blocks
> transparently), with no way of intervening. For instance, when doing
> hardware ECC, the OOB data is not available anywhere to the user, and if
> one wants to actually read it a separate OOB write needs to be done. I
> think the target market for the IP is really a general real time OS where
> there is no NAND driver available, and you just want to fire off a single
> high level command, wait for an interrupt, and have your data waiting for
> you.

I expect to see more and more newer NAND flash controllers that are
like this. The one I am working with (FTNANDC024 from Faraday) is like
this too - very very high-level.

> This latter property is actually advantageous in Linux too as the driver
> doesn't have to do bit- and byte-banging against the NAND flash. I'm not
> sure what the gain in overhead is in practice, but at any rate there's not
> much of a choice.

It should be advantageous to any OS that uses the abstractions
provided by the hardware instead of fighting them. The problem is that
the Linux MTD system's current idea of what a NAND controller should
look like is now out of sync with the new hardware realities.

> Given that the controller does not have the transparency that the
> ->cmd_ctrl() approach requires, as noted above, I can't see how it could
> be simplified.
>
> I basically need to grab everything needed for a given operation and
> interpret it before handing it over to the controller. I considered using
> a higher level API, by replacing the default ->cmdfunc() (default
> nand_command/nand_command_lp) with a specific version, which would have
> avoided the need to interpret the NAND commands arriving via ->cmd_ctrl(),
> but that meant duplicating some of the logic in nand_base.c which seemed
> like a bad idea.

For my FTNANDC024 driver I went for an ever more radical approach: I
decided to forego the "nand" layer in Linux entirely and attach my
driver directly to the MTD layer. There is very little that
nand_base.c provides that is useful to a high-level controller whose
abstractions are "read these logical sectors", "write these logical
sectors" and "erase these blocks", it is really only useful for the
simpler NAND controllers that don't do all of the heavy lifting in
hardware.

And it is NOT a question of "optimization" - the problem is not that
going through the paradigm imposed by nand_base.c precludes the use of
some optional higher-performance features of smart controllers -
instead the controllers which you and I are working with *require* the
use of their highly abstracted interfaces, and *do not* provide any
kind of raw or transparent pass-through mode.

M~

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/4] mtd: nand: Add support for Evatronix NANDFLASH-CTRL
  2016-06-09 17:24     ` Mychaela Falconia
@ 2016-06-09 18:01       ` Boris Brezillon
  2016-06-09 19:35         ` Mychaela Falconia
  0 siblings, 1 reply; 18+ messages in thread
From: Boris Brezillon @ 2016-06-09 18:01 UTC (permalink / raw)
  To: Mychaela Falconia
  Cc: Ricard Wanderlof, devicetree, Tony Lindgren, linux-kernel,
	Oleksij Rempel, Linux mtd, Benoit Cousson, Brian Norris,
	David Woodhouse

On Thu, 9 Jun 2016 09:24:19 -0800
Mychaela Falconia <mychaela.falconia@gmail.com> wrote:

> On 6/9/16, Ricard Wanderlof <ricard.wanderlof@axis.com> wrote:
> > The designers of this IP apparantly did not have Linux in mind when they
> > designed the controller, since it does all the low level stuff
> > autonomously (in the right IP configuration it can even remap flash blocks
> > transparently), with no way of intervening. For instance, when doing
> > hardware ECC, the OOB data is not available anywhere to the user, and if
> > one wants to actually read it a separate OOB write needs to be done. I
> > think the target market for the IP is really a general real time OS where
> > there is no NAND driver available, and you just want to fire off a single
> > high level command, wait for an interrupt, and have your data waiting for
> > you.  
> 
> I expect to see more and more newer NAND flash controllers that are
> like this. The one I am working with (FTNANDC024 from Faraday) is like
> this too - very very high-level.

Hm, I'm not so sure. I've seen a lot of recent drivers that still
provide this low-level interface, while providing means to optimize
things if the user care about implementing software support for it.

And AFAICT, these 'high-level controllers' are not a new thing.

> 
> > This latter property is actually advantageous in Linux too as the driver
> > doesn't have to do bit- and byte-banging against the NAND flash. I'm not
> > sure what the gain in overhead is in practice, but at any rate there's not
> > much of a choice.  
> 
> It should be advantageous to any OS that uses the abstractions
> provided by the hardware instead of fighting them.

True.

> The problem is that
> the Linux MTD system's current idea of what a NAND controller should
> look like is now out of sync with the new hardware realities.

I also agree on this aspect. Though what you consider an evolution with
these 'high-level' controllers is in my opinion a regression.

By supporting only a subset of what NAND chips actually support, and
preventing any raw access, you just limit the compatibility of the NAND
controller with rather old NAND chips. For example, your controller
cannot deal with MLC/TLC NANDs because it just can't send the private
commands required to have reliable support for these NANDs.

> 
> > Given that the controller does not have the transparency that the  
> > ->cmd_ctrl() approach requires, as noted above, I can't see how it could  
> > be simplified.
> >
> > I basically need to grab everything needed for a given operation and
> > interpret it before handing it over to the controller. I considered using
> > a higher level API, by replacing the default ->cmdfunc() (default
> > nand_command/nand_command_lp) with a specific version, which would have
> > avoided the need to interpret the NAND commands arriving via ->cmd_ctrl(),
> > but that meant duplicating some of the logic in nand_base.c which seemed
> > like a bad idea.  
> 
> For my FTNANDC024 driver I went for an ever more radical approach: I
> decided to forego the "nand" layer in Linux entirely and attach my
> driver directly to the MTD layer. There is very little that
> nand_base.c provides that is useful to a high-level controller whose
> abstractions are "read these logical sectors", "write these logical
> sectors" and "erase these blocks", it is really only useful for the
> simpler NAND controllers that don't do all of the heavy lifting in
> hardware.

Yep, except what you call simple controllers are actually not
simpler than yours, they just provide a raw interface in addition to
their advanced/high-level logic in order to let the software support
features that were not supported when the controller IP was designed.

This is IMO a much saner design than limiting the interface to higher
level abstraction, which are likely to be compatible with only a subset
of all the NAND devices available out there.

> 
> And it is NOT a question of "optimization" - the problem is not that
> going through the paradigm imposed by nand_base.c precludes the use of
> some optional higher-performance features of smart controllers -
> instead the controllers which you and I are working with *require* the
> use of their highly abstracted interfaces, and *do not* provide any
> kind of raw or transparent pass-through mode.

I understand this constraint, and I know some controllers are really
like this, but before deciding to move those controllers to some
'high-level NAND' framework, I'd like to make sure they are really not
providing this raw interface.

I've been told many times that NAND controllers were not supporting raw
accesses (disabling the ECC engine when accessing the NAND), and most
of the time it was false, but the developers just didn't care about
supporting this feature, and things like that make the whole subsystem
unmaintainable.

I fear the same will happen with this high-level interface: once we'll
add it, people will just decide to re-implement everything on their
own, and support only the set of feature they need. And we'll end-up
with another set of unmaintainable drivers.

So my answer is yes, I'm okay providing this high level NAND controller
framework, but I'd like to make sure drivers going in there are not
abusing it, just because it's simpler to implement a driver for their
specific use-case. And given the CMD registers exposed by the Evatronix
IP I had the feeling that this low level interface was available...

-- 
Boris Brezillon, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/4] mtd: nand: Add support for Evatronix NANDFLASH-CTRL
  2016-06-09 18:01       ` Boris Brezillon
@ 2016-06-09 19:35         ` Mychaela Falconia
  2016-06-09 20:23           ` Boris Brezillon
  0 siblings, 1 reply; 18+ messages in thread
From: Mychaela Falconia @ 2016-06-09 19:35 UTC (permalink / raw)
  To: Boris Brezillon
  Cc: Ricard Wanderlof, devicetree, Tony Lindgren, linux-kernel,
	Oleksij Rempel, Linux mtd, Benoit Cousson, Brian Norris,
	David Woodhouse

On 6/9/16, Boris Brezillon <boris.brezillon@free-electrons.com> wrote:
> I also agree on this aspect. Though what you consider an evolution with
> these 'high-level' controllers is in my opinion a regression.
>
> By supporting only a subset of what NAND chips actually support, and
> preventing any raw access, you just limit the compatibility of the NAND
> controller with rather old NAND chips. For example, your controller
> cannot deal with MLC/TLC NANDs because it just can't send the private
> commands required to have reliable support for these NANDs.

I am not the one who designed the controller IP, so please don't shoot
the messenger. I fully agree that it would have been much safer if
they just bothered to add a couple of registers to their IP that
provide a raw bypass directly to the NAND interface pins - and it
would have been trivial to do so, adding just a few transistors to the
silicon - but apparently the IP vendor chose not to. I agree that it
was a poor decision on their part, but as Linux geeks we don't get the
power to change the IP we got hired to write drivers for.

As for private commands which the IP does not know about, there is a
way to send them, but it is not a truly raw way. With FTNANDC024 if
you need to send a private command to the NAND, you have to write some
custom FTNANDC024 microcode for it. The datasheet explains how to
write your own microcode, and the IP includes a small SRAM where you
can put your own microcode in addition to the mask ROM with the
standard microcode. But this provision still would not allow a
low-level driver implementation: there is still no way to implement a
driver function that just sends any arbitrary command opcode without
caring what it is, instead you would have to write a different special
microcode routine for each non-standard command you need to support,
and naturally the microcode SRAM has a finite size.

> I understand this constraint, and I know some controllers are really
> like this, but before deciding to move those controllers to some
> 'high-level NAND' framework, I'd like to make sure they are really not
> providing this raw interface.

I would be glad to share the datasheet for the controller I am working
with, so you can verify with your own eyes that it does not provide
any truly raw interface - but I don't have a place to post it. If you
like, I can send it to you as an off-list email attachment (1775641
bytes), and you can then repost it somewhere for others to see.

> I've been told many times that NAND controllers were not supporting raw
> accesses (disabling the ECC engine when accessing the NAND), and most
> of the time it was false, but the developers just didn't care about
> supporting this feature, and things like that make the whole subsystem
> unmaintainable.

With FTNANDC024 the closest you can get to a raw read are the
following two features:

1. You can disable the ECC engine and read the first "data size" bytes
of the page raw, but it does not get you the OOB area. For example, on
the Micron SLC chip I am working with currently, the raw page size is
4096+224 bytes, but the FTNANDC024 can only read the 4096 "data" bytes
this way, and not the remaining 224.

2. There is a "byte read" command that performs a truly raw read of
any range of bytes (any column address) within a page, but it can only
read a maximum of 32 bytes per operation.

If I wanted to do a truly raw dump of a full NAND page with my
controller and NAND chip (like I did when I wanted to reverse-engineer
their hardware BCH implementation), I would have to issue one read
command to get the first 4096 bytes, then another 7 "byte read"
commands to get the remaining 224 bytes. 8 read commands in total to
get a raw dump of one full NAND page, and each of those 8 FTNANDC024
read commands internally involves sending a new Read Page command to
the physical NAND - thus any read disturbs are invoked 8 times per
page instead of once.

It's even worse with raw writes - if I wanted to do a raw write of a
full NAND page, I would have to do it in 8 separate write commands
just like with the raw read. I assume that the NAND chip won't like it
at all, as it would get 8 separate Page Program commands for the same
page with different column addresses.

M~

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/4] mtd: nand: Add support for Evatronix NANDFLASH-CTRL
  2016-06-09 19:35         ` Mychaela Falconia
@ 2016-06-09 20:23           ` Boris Brezillon
  2016-06-10  5:07             ` Mychaela Falconia
  2016-06-10 14:22             ` Ricard Wanderlof
  0 siblings, 2 replies; 18+ messages in thread
From: Boris Brezillon @ 2016-06-09 20:23 UTC (permalink / raw)
  To: Mychaela Falconia, Ricard Wanderlof
  Cc: devicetree, Tony Lindgren, linux-kernel, Oleksij Rempel,
	Linux mtd, Benoit Cousson, Brian Norris, David Woodhouse

On Thu, 9 Jun 2016 11:35:53 -0800
Mychaela Falconia <mychaela.falconia@gmail.com> wrote:

> On 6/9/16, Boris Brezillon <boris.brezillon@free-electrons.com> wrote:
> > I also agree on this aspect. Though what you consider an evolution with
> > these 'high-level' controllers is in my opinion a regression.
> >
> > By supporting only a subset of what NAND chips actually support, and
> > preventing any raw access, you just limit the compatibility of the NAND
> > controller with rather old NAND chips. For example, your controller
> > cannot deal with MLC/TLC NANDs because it just can't send the private
> > commands required to have reliable support for these NANDs.  
> 
> I am not the one who designed the controller IP, so please don't shoot
> the messenger.

Yes, sorry, I was just over-reacting because I'm tired earing that the
only solution to get a high performances is to hide everything in the
controller which is likely to be incompatible with NANDs we'll see in a
few month from there.

> I fully agree that it would have been much safer if
> they just bothered to add a couple of registers to their IP that
> provide a raw bypass directly to the NAND interface pins - and it
> would have been trivial to do so, adding just a few transistors to the
> silicon - but apparently the IP vendor chose not to. I agree that it
> was a poor decision on their part, but as Linux geeks we don't get the
> power to change the IP we got hired to write drivers for.

Hm, I think it's changing now that a lot of SoCs are advertised to be
running Linux. But you're right in that existing IPs might not support
this low-level mode.

> 
> As for private commands which the IP does not know about, there is a
> way to send them, but it is not a truly raw way. With FTNANDC024 if
> you need to send a private command to the NAND, you have to write some
> custom FTNANDC024 microcode for it. The datasheet explains how to
> write your own microcode, and the IP includes a small SRAM where you
> can put your own microcode in addition to the mask ROM with the
> standard microcode. But this provision still would not allow a
> low-level driver implementation: there is still no way to implement a
> driver function that just sends any arbitrary command opcode without
> caring what it is, instead you would have to write a different special
> microcode routine for each non-standard command you need to support,
> and naturally the microcode SRAM has a finite size.

Hm, I don't understand why it's not possible to implement basic
sequences, but I don't know anything about FTNANDC024, so let's assume
you're right.

> 
> > I understand this constraint, and I know some controllers are really
> > like this, but before deciding to move those controllers to some
> > 'high-level NAND' framework, I'd like to make sure they are really not
> > providing this raw interface.  
> 
> I would be glad to share the datasheet for the controller I am working
> with, so you can verify with your own eyes that it does not provide
> any truly raw interface - but I don't have a place to post it. If you
> like, I can send it to you as an off-list email attachment (1775641
> bytes), and you can then repost it somewhere for others to see.

Sure, feel free to send it to me, I'll have a look. And maybe you can
also share your code (both the new and old versions of the driver).

> 
> > I've been told many times that NAND controllers were not supporting raw
> > accesses (disabling the ECC engine when accessing the NAND), and most
> > of the time it was false, but the developers just didn't care about
> > supporting this feature, and things like that make the whole subsystem
> > unmaintainable.  
> 
> With FTNANDC024 the closest you can get to a raw read are the
> following two features:

The raw vs ECC mode thing was just an example to illustrate my point:
people usually lie (intentionally or not) about what's really
supported :).

> 
> 1. You can disable the ECC engine and read the first "data size" bytes
> of the page raw, but it does not get you the OOB area. For example, on
> the Micron SLC chip I am working with currently, the raw page size is
> 4096+224 bytes, but the FTNANDC024 can only read the 4096 "data" bytes
> this way, and not the remaining 224.

Yes, that's a problem.

> 
> 2. There is a "byte read" command that performs a truly raw read of
> any range of bytes (any column address) within a page, but it can only
> read a maximum of 32 bytes per operation.

Raw access is usually not something you expect to be fast, it's here to
help people debugging their implementation, or providing a fallback
when ECC correction failed (which should not happen that often).
So it's fine if it's providing fast.

> 
> If I wanted to do a truly raw dump of a full NAND page with my
> controller and NAND chip (like I did when I wanted to reverse-engineer
> their hardware BCH implementation), I would have to issue one read
> command to get the first 4096 bytes, then another 7 "byte read"
> commands to get the remaining 224 bytes. 8 read commands in total to
> get a raw dump of one full NAND page, and each of those 8 FTNANDC024
> read commands internally involves sending a new Read Page command to
> the physical NAND - thus any read disturbs are invoked 8 times per
> page instead of once.

Hm, so you can't even move the column pointer within a page
(NAND_CMD_RNDOUT)?
Even if it's the case, as I said, raw access is mainly here for
debugging purpose, so the read-disturbance caused by the multiple page
load operations shouldn't be a problem.

> 
> It's even worse with raw writes - if I wanted to do a raw write of a
> full NAND page, I would have to do it in 8 separate write commands
> just like with the raw read. I assume that the NAND chip won't like it
> at all, as it would get 8 separate Page Program commands for the same
> page with different column addresses.

Nope, this one won't work. That's completely crazy to prevent one from
doing such basic things, but given you previous explanation I'm not
really surprised.

Now back to the Evatronix IP. I had a closer look at Ricard's code, and
it seems the controller is actually supporting a low-level mode
(command seq 18 and 19 + MAKE_GEN_CMD()).

So my suggestion is to implement ->cmd_ctrl() and use these generic
commands. And of course optimized/packed accesses (using specific
commands) can be used in ecc->read/write_page(). This would require a
flag to ask the core to not send the READ/WRITE commands before calling
ecc->read/write_page(), but that's doable.
All other commands would use the generic mode, since they don't require
high performances or any specific handling.

This way you don't have to implement your own ->cmdfunc() function, you
can get rid of all the specific ID/ONFI detection logic (the generic
commands should allow you to retrieve those information) and rely on the
default implementation.

Ricard, would that work?


-- 
Boris Brezillon, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/4] mtd: nand: Add support for Evatronix NANDFLASH-CTRL
  2016-06-09 20:23           ` Boris Brezillon
@ 2016-06-10  5:07             ` Mychaela Falconia
  2016-06-10 12:40               ` Boris Brezillon
  2016-06-10 14:22             ` Ricard Wanderlof
  1 sibling, 1 reply; 18+ messages in thread
From: Mychaela Falconia @ 2016-06-10  5:07 UTC (permalink / raw)
  To: Boris Brezillon
  Cc: Ricard Wanderlof, devicetree, Tony Lindgren, linux-kernel,
	Oleksij Rempel, Linux mtd, Benoit Cousson, Brian Norris,
	David Woodhouse

On 6/9/16, Boris Brezillon <boris.brezillon@free-electrons.com> wrote:
> Hm, I think it's changing now that a lot of SoCs are advertised to be
> running Linux. But you're right in that existing IPs might not support
> this low-level mode.

Faraday (the IP vendor in the present case) advertise Linux support as
well, but they never mainlined any of it, and instead they provide
their own vendor Linux trees. The one I got is based on linux-3.3; I
don't know if they have a newer one. They do "support" FTNANDC024
under Linux as well, but their driver for it is gawdawful - see below.

> Hm, I don't understand why it's not possible to implement basic
> sequences, but I don't know anything about FTNANDC024, so let's assume
> you're right.

Read the datasheet (link below) and tell me what you think.

> Sure, feel free to send it to me, I'll have a look. And maybe you can
> also share your code (both the new and old versions of the driver).

I decided to go ahead and abuse my personal web space on another
(nothing to do with Linux or with NAND flash) project's server for the
purpose of sharing this stuff:

https://www.freecalypso.org/members/falcon/linux-mtd/

There you will find the IP datasheet, the vendor's driver (GPL), and a
current snapshot of my work-in-progress replacement.

> Hm, so you can't even move the column pointer within a page
> (NAND_CMD_RNDOUT)?

See the FTNANDC024 microcode listings on datasheet PDF pages 108
through 117. Every FTNANDC024 operation is an execution of one of
these complete microcode routines from start to finish. Just because a
given microcode flow includes the issuance of a given NAND command
(such as Change Read Column or Change Write Column) does not mean that
you could just ask the controller to issue that command by itself,
without executing a complete microcode flow which also includes the
Read Page or Program Page command.

The only workaround would be to write our own microcode. I think this
approach would actually work: we could write shorter microcode
routines which *just* issue a given NAND opcode and then stop there,
and another separate microcode routine (to be invoked via a separate
command) which would only do what they call "RD_SP" or "WR_SP" (raw
byte transfers of 1 to 32 bytes), without issuing a Read Page command
before or a Program Page command after. This approach would allow us
to perform truly raw page reads and writes, but it would be very ugly
and inefficient. It would also require a separate microcode routine
for each different command, NOT one generic microcode routine that
would correspond to ->cmd_ctrl() or ->cmdfunc().

M~

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/4] mtd: nand: Add support for Evatronix NANDFLASH-CTRL
  2016-06-10  5:07             ` Mychaela Falconia
@ 2016-06-10 12:40               ` Boris Brezillon
  2016-06-12 16:08                 ` Boris Brezillon
  0 siblings, 1 reply; 18+ messages in thread
From: Boris Brezillon @ 2016-06-10 12:40 UTC (permalink / raw)
  To: Mychaela Falconia
  Cc: Ricard Wanderlof, devicetree, Tony Lindgren, linux-kernel,
	Oleksij Rempel, Linux mtd, Benoit Cousson, Brian Norris,
	David Woodhouse

On Thu, 9 Jun 2016 21:07:49 -0800
Mychaela Falconia <mychaela.falconia@gmail.com> wrote:

> On 6/9/16, Boris Brezillon <boris.brezillon@free-electrons.com> wrote:
> > Hm, I think it's changing now that a lot of SoCs are advertised to be
> > running Linux. But you're right in that existing IPs might not support
> > this low-level mode.  
> 
> Faraday (the IP vendor in the present case) advertise Linux support as
> well, but they never mainlined any of it, and instead they provide
> their own vendor Linux trees. The one I got is based on linux-3.3; I
> don't know if they have a newer one. They do "support" FTNANDC024
> under Linux as well, but their driver for it is gawdawful - see below.
> 
> > Hm, I don't understand why it's not possible to implement basic
> > sequences, but I don't know anything about FTNANDC024, so let's assume
> > you're right.  
> 
> Read the datasheet (link below) and tell me what you think.
> 
> > Sure, feel free to send it to me, I'll have a look. And maybe you can
> > also share your code (both the new and old versions of the driver).  
> 
> I decided to go ahead and abuse my personal web space on another
> (nothing to do with Linux or with NAND flash) project's server for the
> purpose of sharing this stuff:
> 
> https://www.freecalypso.org/members/falcon/linux-mtd/
> 
> There you will find the IP datasheet, the vendor's driver (GPL), and a
> current snapshot of my work-in-progress replacement.

Thanks for sharing that. That's actually a quite interesting beast, and
it's way more evolved than I first thought.

I might be wrong, but it seems that ->cmd_ctrl() can be supported using
the custom NAND op approach (custom uCode in SRAM).

This doesn't prevent you from optimizing things for operations where
performances really matter (read/write with ECC), by using advanced
sequencing (reusing the supported cmdset, or even implementing your
own). I'm actually impressed by the degree of liberty this controller
gives: while some sequences are provided you can create you own ones
and still benefit from the controller optimizations.

Didn't look at the code yet, but I'm pretty confident we'll be able to
make the driver fit in the existing model, and that moving to the new
model (where I plan to give more freedom to the controller), will make
it even more interesting.

I'll try to come with a proposal for you to test/review after reviewing
the code.

> 
> > Hm, so you can't even move the column pointer within a page
> > (NAND_CMD_RNDOUT)?  
> 
> See the FTNANDC024 microcode listings on datasheet PDF pages 108
> through 117. Every FTNANDC024 operation is an execution of one of
> these complete microcode routines from start to finish. Just because a
> given microcode flow includes the issuance of a given NAND command
> (such as Change Read Column or Change Write Column) does not mean that
> you could just ask the controller to issue that command by itself,
> without executing a complete microcode flow which also includes the
> Read Page or Program Page command.

True.

> 
> The only workaround would be to write our own microcode. I think this
> approach would actually work: we could write shorter microcode
> routines which *just* issue a given NAND opcode and then stop there,
> and another separate microcode routine (to be invoked via a separate
> command) which would only do what they call "RD_SP" or "WR_SP" (raw
> byte transfers of 1 to 32 bytes), without issuing a Read Page command
> before or a Program Page command after. This approach would allow us
> to perform truly raw page reads and writes, but it would be very ugly
> and inefficient. It would also require a separate microcode routine
> for each different command, NOT one generic microcode routine that
> would correspond to ->cmd_ctrl() or ->cmdfunc().

Let's see if we can do something smarter.
Note that my proposal was to bypass ->cmd_ctrl() usage for path that
are requiring high-perfs (ecc->read/write_page()), and only rely on
if for the other operations, where performances are not important, but
re-usability of existing code is (I'm thinking of NAND detection here).

-- 
Boris Brezillon, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/4] mtd: nand: Add support for Evatronix NANDFLASH-CTRL
  2016-06-09 20:23           ` Boris Brezillon
  2016-06-10  5:07             ` Mychaela Falconia
@ 2016-06-10 14:22             ` Ricard Wanderlof
  2016-06-10 16:07               ` Boris Brezillon
  1 sibling, 1 reply; 18+ messages in thread
From: Ricard Wanderlof @ 2016-06-10 14:22 UTC (permalink / raw)
  To: Boris Brezillon
  Cc: Mychaela Falconia, devicetree, Tony Lindgren, linux-kernel,
	Oleksij Rempel, Linux mtd, Benoit Cousson, Brian Norris,
	David Woodhouse


On Thu, 9 Jun 2016, Boris Brezillon wrote:

> > >
> > > By supporting only a subset of what NAND chips actually support, and
> > > preventing any raw access, you just limit the compatibility of the NAND
> > > controller with rather old NAND chips. For example, your controller
> > > cannot deal with MLC/TLC NANDs because it just can't send the private
> > > commands required to have reliable support for these NANDs.  
> > 
> > I am not the one who designed the controller IP, so please don't shoot
> > the messenger.
> 
> Yes, sorry, I was just over-reacting because I'm tired earing that the
> only solution to get a high performances is to hide everything in the
> controller which is likely to be incompatible with NANDs we'll see in a
> few month from there.

I don't know what the situation is with other NAND drivers and controller 
IP's, but in my case, the set of NAND flashes which the SoC in which the 
Evatronix IP is included is intended to operate with is fairly limited, 
partly because our products don't require a great veriety, and partly for 
SoC verification reasons (the fewer flash types tested, the less time and 
money, essentially). So the mindset from the outset was 'we need to 
support these flashes, can we do it', rather than 'we want a general 
driver', which in the end is reflected in the somewhat limited set of 
flash types initially supported by the driver.

I fully understand that the opposite is true of the Linux kernel, which is 
intended to be as general as possible, I'm just trying to offer an 
explanation for the somewhat limited scope of driver developers, 
especially those working on company time, the understanding of which 
eventually might lead to use finding ways to solve this dilemma.

FWIW, in the Evatronix case, it wasn't any performance issue that drove 
the driver development, it just seemed like the Right Thing to Do.

> > > I've been told many times that NAND controllers were not supporting raw
> > > accesses (disabling the ECC engine when accessing the NAND), and most
> > > of the time it was false, but the developers just didn't care about
> > > supporting this feature, and things like that make the whole subsystem
> > > unmaintainable.  

Yeah, I've come across a driver (not in mainline though) with precisely 
this problem. The hardware supported hardware BCH ECC, but without 
returning the number of corrected bits, which made almost useless, as 
there was no way of detecting when the number of bits in an ECC block was 
nearing the limit for a read failure. The solution was to implement 
software ECC, but the driver didn't really support this mode to start with 
and it had to be added.

(FWIW, the Evatronix driver does support both hardware and software ECC, 
the latter mostly intended for verification and debugging purposes).

> Now back to the Evatronix IP. I had a closer look at Ricard's code, and
> it seems the controller is actually supporting a low-level mode
> (command seq 18 and 19 + MAKE_GEN_CMD()).

Yes, that's true, it is labelled as a 'generic command sequence' in the 
document. Well, it's not really a low level mode, as you can see you still 
need to do the whole operation in one go, but in the end that is what it 
accomplishes.

> So my suggestion is to implement ->cmd_ctrl() and use these generic
> commands. And of course optimized/packed accesses (using specific
> commands) can be used in ecc->read/write_page().

Actually, the use of ECC or not is governed outside the IP command set. I 
already use the generic command sequence (SEQ18/19) for ECC reads and 
writes towards flashes with 4 byte addresses. So it should be doable to 
use the generic command sequencer for any number of address bytes, both 
with and without ECC.

> This would require a flag to ask the core to not send the READ/WRITE 
> commands before calling ecc->read/write_page(), but that's doable.

Ok, so a change required in the core to get this to work then. I've tended 
to avoid writing driver code so that it requires changes to the framwork 
unless absolutely necessary, as the changes tend to be rather specific and 
clutter up the general code (which, honestly, is bad enough as it is, not 
trying to blame anyone here, just an observation), and can usually be 
resolved in the driver with a bit of ingenuity.

> All other commands would use the generic mode, since they don't require
> high performances or any specific handling.
> 
> This way you don't have to implement your own ->cmdfunc() function, you
> can get rid of all the specific ID/ONFI detection logic (the generic
> commands should allow you to retrieve those information)

FWIW, there isn't much ID detection logic, although looking at it some of 
the comments imply that there is (and that should be changed of course), 
because the ID type byte is actually identical to what the controller uses 
to select the relevant type.

> and rely on the default implementation.
> 
> Ricard, would that work?

The main reason the I've been using the ->cmdfunc() interface is that the 
API is on a fairly high level ("here's a sequence of address bytes", "read 
a page", etc) which is on similar level to the API to the actual IP (i.e.  
"read a page from this address").

In contrast, using the ->cmd_ctrl() interface means that I've got to 
interpret a sequence of bytes coming in as multiple function calls. It 
just seemed like a bad idea compared to interpreting a set of high level 
commands, given that the controller also has a high level API. It seemed a 
bit like a roundabout way letting someone (i.e. nand_command) encode a 
high level command into several low level operations, which must then be 
deciphered by the flash driver one by one in order to assemble another 
high level command. It seemed much more direct to process the high level 
commands directly - and easier to read, as the required operations are 
directly visible as macros. The ->cmd_ctrl() interface is fine for 
byte-banging an 8-bit I/O port as that was what it was designed for, but 
quite simply seemed to be the wrong level for anything more advanced than 
that. Sortof akin to reading a file with getc() rather than read(), which 
is why I never really considered it.

But I can definitely see your point, especially as maintainer with the 
goal of supporting as many devices as possible, and also considering your 
plans (as you mentioned in another reply) to rework the API, which would 
mean that the ->cmdfunc() API would be changing, with the associated 
changes in drivers that use that API.

Looking at the documentation, it does look doable, but to a certain extent 
it's in the category "can't really tell until I've tried it". In the worst 
case, some operations would still need to use specific IP commands but 
they should be few.

It's a fairly extensive rewrite though, as a lot of the internal logic of 
the driver is based on the external API being a high level, even if the 
code in the end will be simpler.

The company I work for has an explicit goal of getting as much of the 
Linux port for our SoC upstream, so hopefully I can find time to rewrite 
this in the near future, although I'm off on a fairly long summer vacation 
shortly. I'll try to get it underway as soon as I'm back.

Something that I am mildly miffed about is that I've posted this driver 
twice before on the mtd list (although, admittedly, not directly addressed 
to any maintainer), first as an RFC and later as a complete patch, without 
a single response. (Apparently Boris you did respond with comments on 
Oleksij's driver though which I must have missed). Although an RFC might 
not have initiated a detailed review, it would have been a large advantage 
(and wasted less time for all) if the point of using 'wrong' driver 
interface had been brought up and consequently discussed earlier. Yes I 
know, everyone is busy, it's easy to miss things, each and every driver 
can't be reviewed in detail, etc.

/Ricard
-- 
Ricard Wolf Wanderlöf                           ricardw(at)axis.com
Axis Communications AB, Lund, Sweden            www.axis.com
Phone +46 46 272 2016                           Fax +46 46 13 61 30

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/4] mtd: nand: Add support for Evatronix NANDFLASH-CTRL
  2016-06-09  9:08     ` Boris Brezillon
@ 2016-06-10 14:40       ` Ricard Wanderlof
  2016-06-10 15:34         ` Boris Brezillon
  0 siblings, 1 reply; 18+ messages in thread
From: Ricard Wanderlof @ 2016-06-10 14:40 UTC (permalink / raw)
  To: Boris Brezillon
  Cc: devicetree, Tony Lindgren, linux-kernel, Oleksij Rempel,
	Linux mtd, Benoit Cousson, Brian Norris, David Woodhouse


On Thu, 9 Jun 2016, Boris Brezillon wrote:

> > > >  drivers/mtd/nand/Kconfig          |    6 +
> > > >  drivers/mtd/nand/Makefile         |    1 +
> > > >  drivers/mtd/nand/evatronix_nand.c | 1909 +++++++++++++++++++++++++++++++++++++
> > > >  3 files changed, 1916 insertions(+)
> > > >  create mode 100644 drivers/mtd/nand/evatronix_nand.c  
> > > 
> > > Please run checkpatch.pl and fix all the ERRORS and WARNINGS.  
> > 
> > I did run checkpatch.pl (at least the one in the mtd l2 tree), and there 
> > should be no outstanding errors. Some of the warnings are related to lines 
> > ...
> > 
> 
> Well, I'm not asking to fix all 80 chars warnings, but I see a lot of
> warnings and errors in there [1], and I'm pretty sure most of them can
> be addressed in a sane way.

Whoa, something must have gone seriously wrong when I mailed out the 
patch. When I run checkpatch.pl locally on the patch file, I have 0 errors 
and 10 warnings (not 128 errors and 65 warnings).

I tired mailing the patch to myself, extracting it from the incoming 
email, and comparing with the original patch file and they were identical. 
I must check up if something has gone amiss with our external email here, 
as we haven't had problems before. I'll need to look into this. I would 
certainly not submit a patch with any checkpatch.pl errors unless they 
were really false positives.

> > > > +#undef POLLED_XFERS /* Use polled rather than interrupt based transfers */
> > > > +
> > > > +#undef CLEAR_DMA_BUF_AFTER_WRITE /* Useful for debugging */  
> > > 
> > [ ... ]
> > The rationale for leaving this code in is that it can help bring up a new 
> > unknown system with this IP, because I consider the most likely re-use of 
> > this driver to be for someone who is developing a new SoC, as it seems 
> > rather uncommon in commercially available chips.
> 
> That's not how it works. Either you provide a sane way to activate
> these options (using Kconfig entries), or your drop dead-code sections.
> 
> I understand that debugging is important to you, but adding hundreds
> lines of unused code is also hurting readability, so I keep thinking
> that these options should either be removed (with the associated code
> sections) or exposed as Kconfig options.

Ok, I'll give it some consideration and fix it either way.

> > > > +/* DMA buffer for page transfers. */
> > > > +#define DMA_BUF_SIZE (8192 + 640) /* main + spare for 8k page flash */  
> > > 
> > > This should clearly be dynamic.  
> > 
> > 8k pages are the largest the controller can handle, and there's only a 
> > [  ... ]
> 
> If you reject all NAND above 8k + 640 oob bytes I'm fine with this
> fixed size.

Ok, I see what you're getting at. Yes, there needs to be a check for that, 
somewhere.

> > > I'm pretty sure you can implement ->cmd_ctrl() and rely on the default
> > > cmdfunc() logic instead of manually converting those high-level NAND
> > > commands into your own model (which seems to match pretty much the  
> > > ->cmd_ctrl() model, where you can get the number of address and command  
> > > cycles).
> > > [ ... ]
> 
> Just to be clear, you don't have to toggle the pins each time
> ->cmd_ctrl() is called, you can cache the operations and launch it once
> it says it's dones (don't remember the flag).
>

I've omitted comments to this here as the discussion has been carried on 
in the sub thread started by Mychaela.

> > > > +/* Information common to all chips, including the NANDFLASH-CTRL IP */
> > > > +struct nfc_info {  
> > > 
> > > Should inherit from nand_hw_ctrl (see the sunxi_nand driver)...
> > >   
> > > > +	void __iomem *regbase;
> > > > +	struct device *dev;
> > > > +	struct nand_hw_control *controller;  
> > > 
> > > ... and not point to it.  
> > 
> > Ok. I'm a bit unsure what you mean by 'inherit' though; in the sunxi 
> > driver the struct nand_hw_controller is contained within the struct 
> > sunxi_nfc, in my case there's a pointer to a single instance of a 
> > nand_hw_control. I agree that my approach is wasteful on a dynamic 
> > allocation and I have no problems changing it, but conceptually there's 
> > not much of a difference.
> 
> There's a huge different. By embedding the nand_hw_control struct into
> your nfc_info object you allow things like:
> 
> static int get_nfc(struct nand_chip *chip)
> {
> 	return container_of(chip->controller, struct nfc_info,
> 			    controller);
> }
> 
> This way you can retrieve the nfc_info object attached to the nand_chip.

Yes, of course ... or rather,

static int get_nfc(struct nand_hw_control *controller)

I see new what you mean by inherit. It all comes down to if struct 
nfc_info is a specialized type of struct nand_hw_control, or if it just 
refers to it. I had assumed it was the latter that was the paradigm.

In the driver in question it makes no practical difference as there is no 
need to go from a nand_hw_control to an nfc_info (in fact, the 
nand_hw_control is nevery really used explicitly, it tags along, only used 
by the framework).

Still, I'm not trying to make an argument here, just trying to understand 
what the underlying paradigm is. I'll move it inside as it clearly is 
better in several respects.

> > > > +
> > > > +/* This is a global pointer, as we only support one single instance of the NFC.
> > > > + * For multiple instances, we would need to add nfc_info as a parameter to
> > > > + * several functions, as well as adding it as a member of the chip_info struct.
> > > > + * Since most likely a system would only have one NFC instance, we don't
> > > > + * go all the way implementing that feature now.
> > > > + */
> > > > +static struct nfc_info *nfc_info;  
> > > 
> > > Come on! Don't be so lazy, do the right thing.  
> > 
> > It's not a question of laziness, it was a conscious decision: why add 
> > unnecessary bloat and complexity for a case that probably will never 
> > occur? I can certainly change it if you think it's worth while of course.
> 
> It is. And you're okay bloating the code with dead-code, but not with
> implementing this in order to avoid singletons when it clearly
> shouldn't be?

I'm ok with bloating the code with something which I consider may be 
useful, but I have reservations bloating it with something which I don't 
think will ever be used (and which could be added if the need arises 
later) and furthermore is not possible to test and verify properly (as 
there is in fact only one NAND controller on the platform on which I can 
test this).

But I'm fine with adding it, I'm not really trying to knock it, just 
explaining why it wasn't done in the first place. (I think I'm actually 
reacting to the word 'lazy' here...).

> > > > +
> > > > +/* The timing setup is expected to come via DT. We keep some default timings
> > > > + * here for reference, based on a 100 MHz reference clock.
> > > > + */
> > > > +
> > > > +static const struct nfc_timings default_mode0_pll_enabled = {
> > > > +	0x0d151533, 0x000b0515, 0x00000046,
> > > > +	0x00150000, 0x00000000, 0x00000005, 0x00000015 };  
> > > 
> > > Can you explain those magic values?  
> > 
> > Not really, the problem is that the agreement we have with the IP vendor 
> > is that we may not disclose any documentation, outside of what is 
> > absolutely necessary to write working code.
> > 
> > A rationale is that anyone else wanting to use the driver will either be 
> > designing their own SoC in which case they will have access to the 
> > relevant documentation, or if they're using a SoC from someone else, the 
> > SoC vendor will have to provide that information in order for the chip to 
> > be useful anyway.
> 
> Hm, so I'll have a new table like that for each new SoC using this IP?

Yes, I would say that would be the case.

> I must say I don't like the idea, but let's address the other aspects
> first.

Ok.

> As said above, I'm planning to rework the NAND framework to
> support things like:
> 
> struct nand_operation {
> 	u8 cmds[2];
> 	u8 addrs[5];
> 	int ncmds;
> 	int naddrs;
> 	union {
> 		void *out;
> 		const void *in;
> 	};
> 	enum nand_op_direction dir;
> }
> 
> ->exec_op(struct nand_operation *op);
> 
> This way the NAND controller would have all the necessary information
> to trigger the whole operation (omitted the ECC info on purpose, to
> make it clearer).
> 
> But this is not there yet, and in the meantime, if possible, I'd prefer
> seeing drivers implementing the ->cmd_ctrl() function instead of
> overloading the default ->cmdfunc() implementation.

I see, I suppose that's because during the course of this the ->cmdfunc() 
logic will be significantly changed, requiring corresponding changes in 
drivers that do overload that function? Fair enough, that's a pretty good 
reason, probably more so than the alleged simplicity of the ->cmd_ctrl() 
interface.

/Ricard
-- 
Ricard Wolf Wanderlöf                           ricardw(at)axis.com
Axis Communications AB, Lund, Sweden            www.axis.com
Phone +46 46 272 2016                           Fax +46 46 13 61 30

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/4] mtd: nand: Add support for Evatronix NANDFLASH-CTRL
  2016-06-10 14:40       ` Ricard Wanderlof
@ 2016-06-10 15:34         ` Boris Brezillon
  2016-06-10 16:00           ` Ricard Wanderlof
  0 siblings, 1 reply; 18+ messages in thread
From: Boris Brezillon @ 2016-06-10 15:34 UTC (permalink / raw)
  To: Ricard Wanderlof
  Cc: devicetree, Tony Lindgren, linux-kernel, Oleksij Rempel,
	Linux mtd, Benoit Cousson, Brian Norris, David Woodhouse

On Fri, 10 Jun 2016 16:40:39 +0200
Ricard Wanderlof <ricard.wanderlof@axis.com> wrote:
> 
> > > > > +/* Information common to all chips, including the NANDFLASH-CTRL IP */
> > > > > +struct nfc_info {    
> > > > 
> > > > Should inherit from nand_hw_ctrl (see the sunxi_nand driver)...
> > > >     
> > > > > +	void __iomem *regbase;
> > > > > +	struct device *dev;
> > > > > +	struct nand_hw_control *controller;    
> > > > 
> > > > ... and not point to it.    
> > > 
> > > Ok. I'm a bit unsure what you mean by 'inherit' though; in the sunxi 
> > > driver the struct nand_hw_controller is contained within the struct 
> > > sunxi_nfc, in my case there's a pointer to a single instance of a 
> > > nand_hw_control. I agree that my approach is wasteful on a dynamic 
> > > allocation and I have no problems changing it, but conceptually there's 
> > > not much of a difference.  
> > 
> > There's a huge different. By embedding the nand_hw_control struct into
> > your nfc_info object you allow things like:
> > 
> > static int get_nfc(struct nand_chip *chip)
> > {
> > 	return container_of(chip->controller, struct nfc_info,
> > 			    controller);
> > }
> > 
> > This way you can retrieve the nfc_info object attached to the nand_chip.  
> 
> Yes, of course ... or rather,
> 
> static int get_nfc(struct nand_hw_control *controller)

Actually I meant

static inline struct nfc_info *get_nfc(struct nand_chip *chip) ...

> 
> I see new what you mean by inherit. It all comes down to if struct 
> nfc_info is a specialized type of struct nand_hw_control, or if it just 
> refers to it. I had assumed it was the latter that was the paradigm.
> 
> In the driver in question it makes no practical difference as there is no 
> need to go from a nand_hw_control to an nfc_info (in fact, the 
> nand_hw_control is nevery really used explicitly, it tags along, only used 
> by the framework).
> 
> Still, I'm not trying to make an argument here, just trying to understand 
> what the underlying paradigm is. I'll move it inside as it clearly is 
> better in several respects.

The goal here is to retrieve the controller attached to a given chip in
order to avoid the global nfc_info variable (and abusing
nand_get/set_controller_data() to store a pointer to the controller is
not a good idea either: it's supposed to be used to store per-chip
private data).

> 
> > > > > +
> > > > > +/* This is a global pointer, as we only support one single instance of the NFC.
> > > > > + * For multiple instances, we would need to add nfc_info as a parameter to
> > > > > + * several functions, as well as adding it as a member of the chip_info struct.
> > > > > + * Since most likely a system would only have one NFC instance, we don't
> > > > > + * go all the way implementing that feature now.
> > > > > + */
> > > > > +static struct nfc_info *nfc_info;    
> > > > 
> > > > Come on! Don't be so lazy, do the right thing.    
> > > 
> > > It's not a question of laziness, it was a conscious decision: why add 
> > > unnecessary bloat and complexity for a case that probably will never 
> > > occur? I can certainly change it if you think it's worth while of course.  
> > 
> > It is. And you're okay bloating the code with dead-code, but not with
> > implementing this in order to avoid singletons when it clearly
> > shouldn't be?  
> 
> I'm ok with bloating the code with something which I consider may be 
> useful, but I have reservations bloating it with something which I don't 
> think will ever be used (and which could be added if the need arises 
> later) and furthermore is not possible to test and verify properly (as 
> there is in fact only one NAND controller on the platform on which I can 
> test this).
> 
> But I'm fine with adding it, I'm not really trying to knock it, just 
> explaining why it wasn't done in the first place. (I think I'm actually 
> reacting to the word 'lazy' here...).

Then, let's say I really care about this clear separation between NAND
controllers and NAND chips (even if the controller is only supporting a
single device), because it makes things clearer, and because it brings
some consistency in the NAND controller drivers.
That's something I've asked to other contributors, and I'm asking it to
you too.

You'll see that implementing this separation is not much more
complicated than having this global variable, and I must admit global
variable make me scream (especially when they can be avoided).

> 
> > > > > +
> > > > > +/* The timing setup is expected to come via DT. We keep some default timings
> > > > > + * here for reference, based on a 100 MHz reference clock.
> > > > > + */
> > > > > +
> > > > > +static const struct nfc_timings default_mode0_pll_enabled = {
> > > > > +	0x0d151533, 0x000b0515, 0x00000046,
> > > > > +	0x00150000, 0x00000000, 0x00000005, 0x00000015 };    
> > > > 
> > > > Can you explain those magic values?    
> > > 
> > > Not really, the problem is that the agreement we have with the IP vendor 
> > > is that we may not disclose any documentation, outside of what is 
> > > absolutely necessary to write working code.
> > > 
> > > A rationale is that anyone else wanting to use the driver will either be 
> > > designing their own SoC in which case they will have access to the 
> > > relevant documentation, or if they're using a SoC from someone else, the 
> > > SoC vendor will have to provide that information in order for the chip to 
> > > be useful anyway.  
> > 
> > Hm, so I'll have a new table like that for each new SoC using this IP?  
> 
> Yes, I would say that would be the case.
> 
> > I must say I don't like the idea, but let's address the other aspects
> > first.  
> 
> Ok.
> 
> > As said above, I'm planning to rework the NAND framework to
> > support things like:
> > 
> > struct nand_operation {
> > 	u8 cmds[2];
> > 	u8 addrs[5];
> > 	int ncmds;
> > 	int naddrs;
> > 	union {
> > 		void *out;
> > 		const void *in;
> > 	};
> > 	enum nand_op_direction dir;
> > }
> >   
> > ->exec_op(struct nand_operation *op);  
> > 
> > This way the NAND controller would have all the necessary information
> > to trigger the whole operation (omitted the ECC info on purpose, to
> > make it clearer).
> > 
> > But this is not there yet, and in the meantime, if possible, I'd prefer
> > seeing drivers implementing the ->cmd_ctrl() function instead of
> > overloading the default ->cmdfunc() implementation.  
> 
> I see, I suppose that's because during the course of this the ->cmdfunc() 
> logic will be significantly changed, requiring corresponding changes in 
> drivers that do overload that function? Fair enough, that's a pretty good 
> reason, probably more so than the alleged simplicity of the ->cmd_ctrl() 
> interface.

There's another reason actually. We have chip specific functions (like
->setup_read_retry()) which might want to use
non-standard/vendor-specific operations, and this implies patching all
->cmdfunc() implementations, or at least making sure they will work
fine with these new commands.
The ->cmd_ctrl() + generic nand_command_lp() for ->cmdfunc() is making
that a lot easier.

So yes, I'm clearly trying to avoid specific ->cmdfunc() (especially
when they are not generic enough to support new commands).

Again, ->cmd_ctrl() does not have to be used in your internal
ecc->read/write_page() implementations (all you'll have to do is avoid
using the ->cmdfunc() method and create your own NAND controller
specific commands instead), but it should at least be used for basic
operations that do not require high performances (i.e. NAND detection,
NAND RESET, read-retry, ...).

-- 
Boris Brezillon, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/4] mtd: nand: Add support for Evatronix NANDFLASH-CTRL
  2016-06-10 15:34         ` Boris Brezillon
@ 2016-06-10 16:00           ` Ricard Wanderlof
  0 siblings, 0 replies; 18+ messages in thread
From: Ricard Wanderlof @ 2016-06-10 16:00 UTC (permalink / raw)
  To: Boris Brezillon
  Cc: devicetree, Tony Lindgren, linux-kernel, Oleksij Rempel,
	Linux mtd, Benoit Cousson, Brian Norris, David Woodhouse


On Fri, 10 Jun 2016, Boris Brezillon wrote:

> The goal here is to retrieve the controller attached to a given chip in
> order to avoid the global nfc_info variable (and abusing
> nand_get/set_controller_data() to store a pointer to the controller is
> not a good idea either: it's supposed to be used to store per-chip
> private data).

> Then, let's say I really care about this clear separation between NAND
> controllers and NAND chips (even if the controller is only supporting a
> single device), because it makes things clearer, and because it brings
> some consistency in the NAND controller drivers.
> That's something I've asked to other contributors, and I'm asking it to
> you too.

Certainly. As I said, I'm not trying to get away from doing it if there's 
any motivation for it which there clearly is.

> You'll see that implementing this separation is not much more
> complicated than having this global variable, and I must admit global
> variable make me scream (especially when they can be avoided).

Yes, I see now. With struct nand_hw_control in the proper place it allows 
a translation from a struct nand_chip * to a future non-global struct 
nfc_info * . Yeah, I would probably have stumbled upon this when rewriting 
it.
 
> > > But this is not there yet, and in the meantime, if possible, I'd prefer
> > > seeing drivers implementing the ->cmd_ctrl() function instead of
> > > overloading the default ->cmdfunc() implementation.  
> > 
> > I see, I suppose that's because during the course of this the ->cmdfunc() 
> > logic will be significantly changed, requiring corresponding changes in 
> > drivers that do overload that function? Fair enough, that's a pretty good 
> > reason, probably more so than the alleged simplicity of the ->cmd_ctrl() 
> > interface.
> 
> There's another reason actually. We have chip specific functions (like
> ->setup_read_retry()) which might want to use
> non-standard/vendor-specific operations, and this implies patching all
> ->cmdfunc() implementations, or at least making sure they will work
> fine with these new commands.

I see. 

> So yes, I'm clearly trying to avoid specific ->cmdfunc() (especially
> when they are not generic enough to support new commands).

Yes, makes sense. 

> Again, ->cmd_ctrl() does not have to be used in your internal
> ecc->read/write_page() implementations (all you'll have to do is avoid
> using the ->cmdfunc() method and create your own NAND controller
> specific commands instead), but it should at least be used for basic
> operations that do not require high performances (i.e. NAND detection,
> NAND RESET, read-retry, ...).

Ok, good.

/Ricard
-- 
Ricard Wolf Wanderlöf                           ricardw(at)axis.com
Axis Communications AB, Lund, Sweden            www.axis.com
Phone +46 46 272 2016                           Fax +46 46 13 61 30

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/4] mtd: nand: Add support for Evatronix NANDFLASH-CTRL
  2016-06-10 14:22             ` Ricard Wanderlof
@ 2016-06-10 16:07               ` Boris Brezillon
  2016-06-10 16:51                 ` Ricard Wanderlof
  2016-06-13  7:19                 ` Ricard Wanderlof
  0 siblings, 2 replies; 18+ messages in thread
From: Boris Brezillon @ 2016-06-10 16:07 UTC (permalink / raw)
  To: Ricard Wanderlof
  Cc: Mychaela Falconia, devicetree, Tony Lindgren, linux-kernel,
	Oleksij Rempel, Linux mtd, Benoit Cousson, Brian Norris,
	David Woodhouse

On Fri, 10 Jun 2016 16:22:38 +0200
Ricard Wanderlof <ricard.wanderlof@axis.com> wrote:

> On Thu, 9 Jun 2016, Boris Brezillon wrote:
> 
> > > >
> > > > By supporting only a subset of what NAND chips actually support, and
> > > > preventing any raw access, you just limit the compatibility of the NAND
> > > > controller with rather old NAND chips. For example, your controller
> > > > cannot deal with MLC/TLC NANDs because it just can't send the private
> > > > commands required to have reliable support for these NANDs.    
> > > 
> > > I am not the one who designed the controller IP, so please don't shoot
> > > the messenger.  
> > 
> > Yes, sorry, I was just over-reacting because I'm tired earing that the
> > only solution to get a high performances is to hide everything in the
> > controller which is likely to be incompatible with NANDs we'll see in a
> > few month from there.  
> 
> I don't know what the situation is with other NAND drivers and controller 
> IP's, but in my case, the set of NAND flashes which the SoC in which the 
> Evatronix IP is included is intended to operate with is fairly limited, 
> partly because our products don't require a great veriety, and partly for 
> SoC verification reasons (the fewer flash types tested, the less time and 
> money, essentially). So the mindset from the outset was 'we need to 
> support these flashes, can we do it', rather than 'we want a general 
> driver', which in the end is reflected in the somewhat limited set of 
> flash types initially supported by the driver.
> 
> I fully understand that the opposite is true of the Linux kernel, which is 
> intended to be as general as possible, I'm just trying to offer an 
> explanation for the somewhat limited scope of driver developers, 
> especially those working on company time, the understanding of which 
> eventually might lead to use finding ways to solve this dilemma.
> 
> FWIW, in the Evatronix case, it wasn't any performance issue that drove 
> the driver development, it just seemed like the Right Thing to Do.
> 
> > > > I've been told many times that NAND controllers were not supporting raw
> > > > accesses (disabling the ECC engine when accessing the NAND), and most
> > > > of the time it was false, but the developers just didn't care about
> > > > supporting this feature, and things like that make the whole subsystem
> > > > unmaintainable.    
> 
> Yeah, I've come across a driver (not in mainline though) with precisely 
> this problem. The hardware supported hardware BCH ECC, but without 
> returning the number of corrected bits, which made almost useless, as 
> there was no way of detecting when the number of bits in an ECC block was 
> nearing the limit for a read failure. The solution was to implement 
> software ECC, but the driver didn't really support this mode to start with 
> and it had to be added.
> 
> (FWIW, the Evatronix driver does support both hardware and software ECC, 
> the latter mostly intended for verification and debugging purposes).
> 
> > Now back to the Evatronix IP. I had a closer look at Ricard's code, and
> > it seems the controller is actually supporting a low-level mode
> > (command seq 18 and 19 + MAKE_GEN_CMD()).  
> 
> Yes, that's true, it is labelled as a 'generic command sequence' in the 
> document. Well, it's not really a low level mode, as you can see you still 
> need to do the whole operation in one go, but in the end that is what it 
> accomplishes.
> 
> > So my suggestion is to implement ->cmd_ctrl() and use these generic
> > commands. And of course optimized/packed accesses (using specific
> > commands) can be used in ecc->read/write_page().  
> 
> Actually, the use of ECC or not is governed outside the IP command set. I 
> already use the generic command sequence (SEQ18/19) for ECC reads and 
> writes towards flashes with 4 byte addresses. So it should be doable to 
> use the generic command sequencer for any number of address bytes, both 
> with and without ECC.
> 
> > This would require a flag to ask the core to not send the READ/WRITE 
> > commands before calling ecc->read/write_page(), but that's doable.  
> 
> Ok, so a change required in the core to get this to work then. I've tended 
> to avoid writing driver code so that it requires changes to the framwork 
> unless absolutely necessary, as the changes tend to be rather specific and 
> clutter up the general code (which, honestly, is bad enough as it is, not 
> trying to blame anyone here, just an observation), and can usually be 
> resolved in the driver with a bit of ingenuity.

No offense, that's my feeling too, hence the various reworks I've
recently initiated. Getting rid of the ->cmd_ctrl() ->cmdfunc()
approach in favor of a more generic ->exec_op() approach is one of them,
just need some time to figure a way to do it smoothly.

> 
> > All other commands would use the generic mode, since they don't require
> > high performances or any specific handling.
> > 
> > This way you don't have to implement your own ->cmdfunc() function, you
> > can get rid of all the specific ID/ONFI detection logic (the generic
> > commands should allow you to retrieve those information)  
> 
> FWIW, there isn't much ID detection logic, although looking at it some of 
> the comments imply that there is (and that should be changed of course), 
> because the ID type byte is actually identical to what the controller uses 
> to select the relevant type.
> 
> > and rely on the default implementation.
> > 
> > Ricard, would that work?  
> 
> The main reason the I've been using the ->cmdfunc() interface is that the 
> API is on a fairly high level ("here's a sequence of address bytes", "read 
> a page", etc) which is on similar level to the API to the actual IP (i.e.  
> "read a page from this address").
> 
> In contrast, using the ->cmd_ctrl() interface means that I've got to 
> interpret a sequence of bytes coming in as multiple function calls. It 
> just seemed like a bad idea compared to interpreting a set of high level 
> commands, given that the controller also has a high level API. It seemed a 
> bit like a roundabout way letting someone (i.e. nand_command) encode a 
> high level command into several low level operations, which must then be 
> deciphered by the flash driver one by one in order to assemble another 
> high level command. It seemed much more direct to process the high level 
> commands directly - and easier to read, as the required operations are 
> directly visible as macros. The ->cmd_ctrl() interface is fine for 
> byte-banging an 8-bit I/O port as that was what it was designed for, but 
> quite simply seemed to be the wrong level for anything more advanced than 
> that. Sortof akin to reading a file with getc() rather than read(), which 
> is why I never really considered it.
> 
> But I can definitely see your point, especially as maintainer with the 
> goal of supporting as many devices as possible, and also considering your 
> plans (as you mentioned in another reply) to rework the API, which would 
> mean that the ->cmdfunc() API would be changing, with the associated 
> changes in drivers that use that API.
> 
> Looking at the documentation, it does look doable, but to a certain extent 
> it's in the category "can't really tell until I've tried it". In the worst 
> case, some operations would still need to use specific IP commands but 
> they should be few.
> 
> It's a fairly extensive rewrite though, as a lot of the internal logic of 
> the driver is based on the external API being a high level, even if the 
> code in the end will be simpler.
> 
> The company I work for has an explicit goal of getting as much of the 
> Linux port for our SoC upstream, so hopefully I can find time to rewrite 
> this in the near future, although I'm off on a fairly long summer vacation 
> shortly. I'll try to get it underway as soon as I'm back.
> 
> Something that I am mildly miffed about is that I've posted this driver 
> twice before on the mtd list (although, admittedly, not directly addressed 
> to any maintainer), first as an RFC and later as a complete patch, without 
> a single response. (Apparently Boris you did respond with comments on 
> Oleksij's driver though which I must have missed). Although an RFC might 
> not have initiated a detailed review, it would have been a large advantage 
> (and wasted less time for all) if the point of using 'wrong' driver 
> interface had been brought up and consequently discussed earlier. Yes I 
> know, everyone is busy, it's easy to miss things, each and every driver 
> can't be reviewed in detail, etc.

Yes, I know. I was not maintainer at that time, and was only reviewing
a few patches from time to time.
Now that I am, I try to answer as fast as possible. Note that the set
of requirements might have change with me, so even if someone add
agreed on you first submission but not taken the patches, I would have
required the same set of changes ;).



-- 
Boris Brezillon, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/4] mtd: nand: Add support for Evatronix NANDFLASH-CTRL
  2016-06-10 16:07               ` Boris Brezillon
@ 2016-06-10 16:51                 ` Ricard Wanderlof
  2016-06-13  7:19                 ` Ricard Wanderlof
  1 sibling, 0 replies; 18+ messages in thread
From: Ricard Wanderlof @ 2016-06-10 16:51 UTC (permalink / raw)
  To: Boris Brezillon
  Cc: Mychaela Falconia, devicetree, Tony Lindgren, linux-kernel,
	Oleksij Rempel, Linux mtd, Benoit Cousson, Brian Norris,
	David Woodhouse


On Fri, 10 Jun 2016, Boris Brezillon wrote:

> > Something that I am mildly miffed about is that I've posted this driver 
> > twice before on the mtd list (although, admittedly, not directly addressed 
> > to any maintainer), first as an RFC and later as a complete patch, without 
> > a single response.
> > ...
> 
> Yes, I know. I was not maintainer at that time, and was only reviewing
> a few patches from time to time.
> Now that I am, I try to answer as fast as possible. Note that the set
> of requirements might have change with me, so even if someone add
> agreed on you first submission but not taken the patches, I would have
> required the same set of changes ;).

:-) 

At any rate, thanks for taking the time to review the driver so promptly 
and also the for the time you've spent during the subsequent discussions.

/Ricard
-- 
Ricard Wolf Wanderlöf                           ricardw(at)axis.com
Axis Communications AB, Lund, Sweden            www.axis.com
Phone +46 46 272 2016                           Fax +46 46 13 61 30

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/4] mtd: nand: Add support for Evatronix NANDFLASH-CTRL
  2016-06-10 12:40               ` Boris Brezillon
@ 2016-06-12 16:08                 ` Boris Brezillon
  0 siblings, 0 replies; 18+ messages in thread
From: Boris Brezillon @ 2016-06-12 16:08 UTC (permalink / raw)
  To: Mychaela Falconia
  Cc: Ricard Wanderlof, devicetree, Tony Lindgren, linux-kernel,
	Oleksij Rempel, Linux mtd, Benoit Cousson, Brian Norris,
	David Woodhouse

Hi Mychaela,

On Fri, 10 Jun 2016 14:40:00 +0200
Boris Brezillon <boris.brezillon@free-electrons.com> wrote:

> On Thu, 9 Jun 2016 21:07:49 -0800
> Mychaela Falconia <mychaela.falconia@gmail.com> wrote:
> 
> > On 6/9/16, Boris Brezillon <boris.brezillon@free-electrons.com> wrote:  
> > > Hm, I think it's changing now that a lot of SoCs are advertised to be
> > > running Linux. But you're right in that existing IPs might not support
> > > this low-level mode.    
> > 
> > Faraday (the IP vendor in the present case) advertise Linux support as
> > well, but they never mainlined any of it, and instead they provide
> > their own vendor Linux trees. The one I got is based on linux-3.3; I
> > don't know if they have a newer one. They do "support" FTNANDC024
> > under Linux as well, but their driver for it is gawdawful - see below.
> >   
> > > Hm, I don't understand why it's not possible to implement basic
> > > sequences, but I don't know anything about FTNANDC024, so let's assume
> > > you're right.    
> > 
> > Read the datasheet (link below) and tell me what you think.
> >   
> > > Sure, feel free to send it to me, I'll have a look. And maybe you can
> > > also share your code (both the new and old versions of the driver).    
> > 
> > I decided to go ahead and abuse my personal web space on another
> > (nothing to do with Linux or with NAND flash) project's server for the
> > purpose of sharing this stuff:
> > 
> > https://www.freecalypso.org/members/falcon/linux-mtd/
> > 
> > There you will find the IP datasheet, the vendor's driver (GPL), and a
> > current snapshot of my work-in-progress replacement.  
> 
> Thanks for sharing that. That's actually a quite interesting beast, and
> it's way more evolved than I first thought.
> 
> I might be wrong, but it seems that ->cmd_ctrl() can be supported using
> the custom NAND op approach (custom uCode in SRAM).
> 
> This doesn't prevent you from optimizing things for operations where
> performances really matter (read/write with ECC), by using advanced
> sequencing (reusing the supported cmdset, or even implementing your
> own). I'm actually impressed by the degree of liberty this controller
> gives: while some sequences are provided you can create you own ones
> and still benefit from the controller optimizations.
> 
> Didn't look at the code yet, but I'm pretty confident we'll be able to
> make the driver fit in the existing model, and that moving to the new
> model (where I plan to give more freedom to the controller), will make
> it even more interesting.
> 
> I'll try to come with a proposal for you to test/review after reviewing
> the code.

Okay, I had a closer look at both implementations, and indeed Faraday's
implementation was trying to convert different NAND operations into
their associated 'faraday' program flow (referenced by program
address), which not only complicates the implementation, but also
implies adapting the ->cmdfunc() logic each time a new feature is added
to the core.
In the other hand, your driver, while exposing an higher level
interface, lacks all the detection logic that is part of the code
(actually I haven't looked to deeply in the code, but even if the
detection logic is here, this means you're more or less duplicating the
nand_base.c code).

So, my suggestion is to still use the NAND framework, but try to be
smarter to avoid bloating the ->cmdfunc() implementation. Here is a
draft showing the direction (it's not been tested at all, and should
serve a reference rather than a real implementation).
Of course this implementation is not perfect, but it should still
provide basic support for your NAND controller until we come with a
NAND framework v2 allowing for more optimizations at the NAND level.

Don't hesitate to ask questions or point problems in my approach.

Best Regards,

Boris

[1]http://code.bulix.org/kxg9dz-101056

-- 
Boris Brezillon, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/4] mtd: nand: Add support for Evatronix NANDFLASH-CTRL
  2016-06-10 16:07               ` Boris Brezillon
  2016-06-10 16:51                 ` Ricard Wanderlof
@ 2016-06-13  7:19                 ` Ricard Wanderlof
  1 sibling, 0 replies; 18+ messages in thread
From: Ricard Wanderlof @ 2016-06-13  7:19 UTC (permalink / raw)
  To: Boris Brezillon
  Cc: Mychaela Falconia, devicetree, Tony Lindgren, linux-kernel,
	Oleksij Rempel, Linux mtd, Benoit Cousson, Brian Norris,
	David Woodhouse


On Fri, 10 Jun 2016, Boris Brezillon wrote:

> > Something that I am mildly miffed about is that I've posted this driver 
> > twice before on the mtd list (although, admittedly, not directly addressed 
> > to any maintainer), first as an RFC and later as a complete patch, without 
> > a single response. (Apparently Boris you did respond with comments on 
> > Oleksij's driver though which I must have missed). Although an RFC might 
> > not have initiated a detailed review, it would have been a large advantage 
> > (and wasted less time for all) if the point of using 'wrong' driver 
> > interface had been brought up and consequently discussed earlier. Yes I 
> > know, everyone is busy, it's easy to miss things, each and every driver 
> > can't be reviewed in detail, etc.
> 
> Yes, I know. I was not maintainer at that time, and was only reviewing
> a few patches from time to time.
> Now that I am, I try to answer as fast as possible. Note that the set
> of requirements might have change with me, so even if someone add
> agreed on you first submission but not taken the patches, I would have
> required the same set of changes ;).

:-)

Well, I'm not questioning that changes are required, and I'm happy to 
implement them once I've come to a clear understanding of what is needed, 
I just would have liked to known about them earlier, especially the major 
concepts.

I know there was probably no one interested enough at the time, everyone 
is busy, not every idea presented can be reviewed in detail etc etc. And 
it's not a big issue, just a minor annoyance.

/Ricard
-- 
Ricard Wolf Wanderlöf                           ricardw(at)axis.com
Axis Communications AB, Lund, Sweden            www.axis.com
Phone +46 46 272 2016                           Fax +46 46 13 61 30

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2016-06-13  7:19 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-02  7:48 [PATCH 3/4] mtd: nand: Add support for Evatronix NANDFLASH-CTRL Ricard Wanderlof
2016-06-03 14:59 ` Boris Brezillon
2016-06-09  8:19   ` Ricard Wanderlof
2016-06-09  9:08     ` Boris Brezillon
2016-06-10 14:40       ` Ricard Wanderlof
2016-06-10 15:34         ` Boris Brezillon
2016-06-10 16:00           ` Ricard Wanderlof
2016-06-09 17:24     ` Mychaela Falconia
2016-06-09 18:01       ` Boris Brezillon
2016-06-09 19:35         ` Mychaela Falconia
2016-06-09 20:23           ` Boris Brezillon
2016-06-10  5:07             ` Mychaela Falconia
2016-06-10 12:40               ` Boris Brezillon
2016-06-12 16:08                 ` Boris Brezillon
2016-06-10 14:22             ` Ricard Wanderlof
2016-06-10 16:07               ` Boris Brezillon
2016-06-10 16:51                 ` Ricard Wanderlof
2016-06-13  7:19                 ` Ricard Wanderlof

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).